diff --git a/.cache b/.cache
new file mode 120000
index 0000000000000000000000000000000000000000..310cb5b884182f2d345fc69e9a2d0ac9c97808d6
--- /dev/null
+++ b/.cache
@@ -0,0 +1 @@
+/home/javierr/.cache
\ No newline at end of file
diff --git a/.gitattributes b/.gitattributes
index 755356a054893aa3dbdb5ae3fec6a00637e6e3ae..f0eef4b95ff394f9e69664d929bcd93ac1e80134 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.wandb filter=lfs diff=lfs merge=lfs -text
\ No newline at end of file
diff --git a/added_tokens.json b/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..c7206fcce69b9f4554e467c8f4f832150fce187d
--- /dev/null
+++ b/added_tokens.json
@@ -0,0 +1,4 @@
+{
+  "</s>": 40,
+  "<s>": 39
+}
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..260219f2064b3570984d97f765777fa82ba04ed6
--- /dev/null
+++ b/config.json
@@ -0,0 +1,109 @@
+{
+  "activation_dropout": 0.055,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.094,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.04,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": true,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.047,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.041,
+  "mask_feature_length": 64,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.25,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.082,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 38,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 39,
+  "xvector_output_dim": 512
+}
diff --git a/events.out.tfevents.1659137001.t1v-n-eedfb410-w-0.2559923.0.v2 b/events.out.tfevents.1659137001.t1v-n-eedfb410-w-0.2559923.0.v2
new file mode 100644
index 0000000000000000000000000000000000000000..93fa8b058d4ac58879a0af8ae51f420c74ebcbac
--- /dev/null
+++ b/events.out.tfevents.1659137001.t1v-n-eedfb410-w-0.2559923.0.v2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ded7b585446de921ebfe6f76acb1100484ca4c264a7b066ab3fa8904e6e4b6e6
+size 40
diff --git a/events.out.tfevents.1659168077.t1v-n-eedfb410-w-0.2047809.0.v2 b/events.out.tfevents.1659168077.t1v-n-eedfb410-w-0.2047809.0.v2
new file mode 100644
index 0000000000000000000000000000000000000000..7627616f294c3e604616556f2cb1b1cebc135977
--- /dev/null
+++ b/events.out.tfevents.1659168077.t1v-n-eedfb410-w-0.2047809.0.v2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0e93ee6816853f3923399b5d527d2a61ca34ce887c28517df99c05fb20a7e6c
+size 40
diff --git a/events.out.tfevents.1659169464.t1v-n-eedfb410-w-0.1065426.0.v2 b/events.out.tfevents.1659169464.t1v-n-eedfb410-w-0.1065426.0.v2
new file mode 100644
index 0000000000000000000000000000000000000000..681c54c207e4fafd43f9203c926c7aa535aed99f
--- /dev/null
+++ b/events.out.tfevents.1659169464.t1v-n-eedfb410-w-0.1065426.0.v2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e59e7c0f2f3d5c03effbc941690f4567f87bf74e6d559526bbd802d65dc2710a
+size 40
diff --git a/events.out.tfevents.1659171045.t1v-n-eedfb410-w-0.86199.0.v2 b/events.out.tfevents.1659171045.t1v-n-eedfb410-w-0.86199.0.v2
new file mode 100644
index 0000000000000000000000000000000000000000..178ec4b55c6a3c400d4cb788fe67460ad2bbf6f6
--- /dev/null
+++ b/events.out.tfevents.1659171045.t1v-n-eedfb410-w-0.86199.0.v2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f89384c846e08fa515da01d4c2459ae230a9c6702cc6ab9dc8533217d5d44e0
+size 40
diff --git a/events.out.tfevents.1659174715.t1v-n-eedfb410-w-0.3333166.0.v2 b/events.out.tfevents.1659174715.t1v-n-eedfb410-w-0.3333166.0.v2
new file mode 100644
index 0000000000000000000000000000000000000000..a7583e0765c1cd375165732a4331ce7139edbdd9
--- /dev/null
+++ b/events.out.tfevents.1659174715.t1v-n-eedfb410-w-0.3333166.0.v2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ac1687a5731ee3d8fe81c3a3a9cad3798a3cfd2de48cf0e700947ec667e5183
+size 40
diff --git a/events.out.tfevents.1659181854.t1v-n-eedfb410-w-0.3085831.0.v2 b/events.out.tfevents.1659181854.t1v-n-eedfb410-w-0.3085831.0.v2
new file mode 100644
index 0000000000000000000000000000000000000000..1abd574967f1ff0677cdb12866af4911e4f357eb
--- /dev/null
+++ b/events.out.tfevents.1659181854.t1v-n-eedfb410-w-0.3085831.0.v2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c744a976d0f5879c6383cdbfc1c543b24f09469a2279b2297c378547f26f08c5
+size 40
diff --git a/events.out.tfevents.1659182962.t1v-n-eedfb410-w-0.2099342.0.v2 b/events.out.tfevents.1659182962.t1v-n-eedfb410-w-0.2099342.0.v2
new file mode 100644
index 0000000000000000000000000000000000000000..3eccc090277cf26bca9f343fd708ab2f97485fdb
--- /dev/null
+++ b/events.out.tfevents.1659182962.t1v-n-eedfb410-w-0.2099342.0.v2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c58c7b4399f1690c0cf07b003a6f00eda584715b170f4b5ba7ae5694af533262
+size 40
diff --git a/events.out.tfevents.1659184651.t1v-n-eedfb410-w-0.4852.0.v2 b/events.out.tfevents.1659184651.t1v-n-eedfb410-w-0.4852.0.v2
new file mode 100644
index 0000000000000000000000000000000000000000..a59deea18087557d3c0ae5631be4ab9dea9c26d2
--- /dev/null
+++ b/events.out.tfevents.1659184651.t1v-n-eedfb410-w-0.4852.0.v2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:711a53c830c473bac46091e9ffea0b461decbed903088bb927b1c4a9331a21f9
+size 40
diff --git a/events.out.tfevents.1659185790.t1v-n-eedfb410-w-0.3212038.0.v2 b/events.out.tfevents.1659185790.t1v-n-eedfb410-w-0.3212038.0.v2
new file mode 100644
index 0000000000000000000000000000000000000000..88ba3506d3063b9b9dc26c416e37b2849d03af6c
--- /dev/null
+++ b/events.out.tfevents.1659185790.t1v-n-eedfb410-w-0.3212038.0.v2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c9db04b6cba094a693bcea7d3e63f4af0128112bd11f0ca41d1dd0998cfc8a5
+size 40
diff --git a/events.out.tfevents.1659190065.t1v-n-eedfb410-w-0.2276371.0.v2 b/events.out.tfevents.1659190065.t1v-n-eedfb410-w-0.2276371.0.v2
new file mode 100644
index 0000000000000000000000000000000000000000..a5850b5d0e436aab17d3beac038e6add4550c2c7
--- /dev/null
+++ b/events.out.tfevents.1659190065.t1v-n-eedfb410-w-0.2276371.0.v2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2abfac3540820545dabe683f72071edc7efb99f2d83d2bfd2a0ccd92f388ba2
+size 40
diff --git a/events.out.tfevents.1659203868.t1v-n-eedfb410-w-0.1493173.0.v2 b/events.out.tfevents.1659203868.t1v-n-eedfb410-w-0.1493173.0.v2
new file mode 100644
index 0000000000000000000000000000000000000000..71a830ff2ca7502829d13ab32441ccedc40799a4
--- /dev/null
+++ b/events.out.tfevents.1659203868.t1v-n-eedfb410-w-0.1493173.0.v2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a908c7234714cc88dd9694ee3edfa2051d03c68e9bf8d6ca30df43ae3bdb58ef
+size 40
diff --git a/flax_model.msgpack b/flax_model.msgpack
new file mode 100644
index 0000000000000000000000000000000000000000..c5466547ea09e27af43d81804418ed40885a941a
--- /dev/null
+++ b/flax_model.msgpack
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c0a8533250d85a6e1bccebd2a781b061e35d55e0a42a09491a2f29826c05a05
+size 3850218852
diff --git a/models/__init__.py b/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..254251fb5739afb6a196632e07dcee2334d03ed2
--- /dev/null
+++ b/models/__init__.py
@@ -0,0 +1,6 @@
+from models.configuration_bart import BartConfig
+from models.configuration_wav2vec2 import Wav2Vec2Config
+from models.configuration_speech_encoder_decoder import SpeechEncoderDecoderConfig
+from models.modeling_flax_wav2vec2 import FlaxWav2Vec2Model, FlaxWav2Vec2Module, FlaxWav2Vec2ForCTC, FlaxWav2Vec2ForCTCModule
+from models.modeling_flax_bart import FlaxBartForCausalLM, FlaxBartForCausalLMModule
+from models.modeling_flax_speech_encoder_decoder import FlaxSpeechEncoderDecoderModel
diff --git a/models/__pycache__/__init__.cpython-38.pyc b/models/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7207e33c1cc3f1de8c15cd91c251e5c42778332c
Binary files /dev/null and b/models/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/__pycache__/configuration_bart.cpython-38.pyc b/models/__pycache__/configuration_bart.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..142557bbe55d3b6ee9e3cb7c74f0c78f8c93ff49
Binary files /dev/null and b/models/__pycache__/configuration_bart.cpython-38.pyc differ
diff --git a/models/__pycache__/configuration_speech_encoder_decoder.cpython-38.pyc b/models/__pycache__/configuration_speech_encoder_decoder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9081eba88eba38668dd664e832f0585be1ff75e3
Binary files /dev/null and b/models/__pycache__/configuration_speech_encoder_decoder.cpython-38.pyc differ
diff --git a/models/__pycache__/configuration_wav2vec2.cpython-38.pyc b/models/__pycache__/configuration_wav2vec2.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cd52c2fbb1b2ab1ec86fda889821505491291fb1
Binary files /dev/null and b/models/__pycache__/configuration_wav2vec2.cpython-38.pyc differ
diff --git a/models/__pycache__/modeling_flax_bart.cpython-38.pyc b/models/__pycache__/modeling_flax_bart.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..205602c2ff9def7cfbf501c35acc0f975f45b35e
Binary files /dev/null and b/models/__pycache__/modeling_flax_bart.cpython-38.pyc differ
diff --git a/models/__pycache__/modeling_flax_speech_encoder_decoder.cpython-38.pyc b/models/__pycache__/modeling_flax_speech_encoder_decoder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..821aaee1f4bd6812725827fa307af3ea0789c0d9
Binary files /dev/null and b/models/__pycache__/modeling_flax_speech_encoder_decoder.cpython-38.pyc differ
diff --git a/models/__pycache__/modeling_flax_wav2vec2.cpython-38.pyc b/models/__pycache__/modeling_flax_wav2vec2.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6c143adbba5f614708fe32dcd8862aeeebb48999
Binary files /dev/null and b/models/__pycache__/modeling_flax_wav2vec2.cpython-38.pyc differ
diff --git a/models/configuration_bart.py b/models/configuration_bart.py
new file mode 100644
index 0000000000000000000000000000000000000000..45ab9edf6126bcfa2f73bb79e786f5ec2bff5d78
--- /dev/null
+++ b/models/configuration_bart.py
@@ -0,0 +1,183 @@
+# coding=utf-8
+# Copyright 2021 The Fairseq Authors and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" BART model configuration"""
+import warnings
+
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+
+
+logger = logging.get_logger(__name__)
+
+BART_PRETRAINED_CONFIG_ARCHIVE_MAP = {
+    "facebook/bart-large": "https://huggingface.co/facebook/bart-large/resolve/main/config.json",
+    # See all BART models at https://huggingface.co/models?filter=bart
+}
+
+
+class BartConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`BartModel`]. It is used to instantiate a BART
+    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
+    defaults will yield a similar configuration to that of the BART
+    [facebook/bart-large](https://huggingface.co/facebook/bart-large) architecture.
+
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+
+
+    Args:
+        vocab_size (`int`, *optional*, defaults to 50265):
+            Vocabulary size of the BART model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`BartModel`] or [`TFBartModel`].
+        d_model (`int`, *optional*, defaults to 1024):
+            Dimensionality of the layers and the pooler layer.
+        encoder_layers (`int`, *optional*, defaults to 12):
+            Number of encoder layers.
+        decoder_layers (`int`, *optional*, defaults to 12):
+            Number of decoder layers.
+        encoder_attention_heads (`int`, *optional*, defaults to 16):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        decoder_attention_heads (`int`, *optional*, defaults to 16):
+            Number of attention heads for each attention layer in the Transformer decoder.
+        decoder_ffn_dim (`int`, *optional*, defaults to 4096):
+            Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
+        encoder_ffn_dim (`int`, *optional*, defaults to 4096):
+            Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
+        activation_function (`str` or `function`, *optional*, defaults to `"gelu"`):
+            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
+            `"relu"`, `"silu"` and `"gelu_new"` are supported.
+        dropout (`float`, *optional*, defaults to 0.1):
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for the attention probabilities.
+        activation_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for activations inside the fully connected layer.
+        classifier_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for classifier.
+        max_position_embeddings (`int`, *optional*, defaults to 1024):
+            The maximum sequence length that this model might ever be used with. Typically set this to something large
+            just in case (e.g., 512 or 1024 or 2048).
+        init_std (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        encoder_layerdrop: (`float`, *optional*, defaults to 0.0):
+            The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
+            for more details.
+        decoder_layerdrop: (`float`, *optional*, defaults to 0.0):
+            The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
+            for more details.
+        scale_embedding (`bool`, *optional*, defaults to `False`):
+            Scale embeddings by diving by sqrt(d_model).
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models).
+        num_labels: (`int`, *optional*, defaults to 3):
+            The number of labels to use in [`BartForSequenceClassification`].
+        forced_eos_token_id (`int`, *optional*, defaults to 2):
+            The id of the token to force as the last generated token when `max_length` is reached. Usually set to
+            `eos_token_id`.
+        use_scan (`bool`, *optional*, defaults to `False`):
+            Whether or not to use nn.scan in the Flax Bart attention layers.
+
+    Example:
+
+    ```python
+    >>> from transformers import BartModel, BartConfig
+
+    >>> # Initializing a BART facebook/bart-large style configuration
+    >>> configuration = BartConfig()
+
+    >>> # Initializing a model from the facebook/bart-large style configuration
+    >>> model = BartModel(configuration)
+
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+    model_type = "bart"
+    keys_to_ignore_at_inference = ["past_key_values"]
+    attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
+
+    def __init__(
+        self,
+        vocab_size=50265,
+        max_position_embeddings=1024,
+        encoder_layers=12,
+        encoder_ffn_dim=4096,
+        encoder_attention_heads=16,
+        decoder_layers=12,
+        decoder_ffn_dim=4096,
+        decoder_attention_heads=16,
+        encoder_layerdrop=0.0,
+        decoder_layerdrop=0.0,
+        activation_function="gelu",
+        d_model=1024,
+        dropout=0.1,
+        attention_dropout=0.0,
+        activation_dropout=0.0,
+        init_std=0.02,
+        classifier_dropout=0.0,
+        scale_embedding=False,
+        use_cache=True,
+        use_scan=False,
+        fuse_matmuls=False,
+        num_labels=3,
+        pad_token_id=1,
+        bos_token_id=0,
+        eos_token_id=2,
+        is_encoder_decoder=True,
+        decoder_start_token_id=2,
+        forced_eos_token_id=2,
+        **kwargs
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.d_model = d_model
+        self.encoder_ffn_dim = encoder_ffn_dim
+        self.encoder_layers = encoder_layers
+        self.encoder_attention_heads = encoder_attention_heads
+        self.decoder_ffn_dim = decoder_ffn_dim
+        self.decoder_layers = decoder_layers
+        self.decoder_attention_heads = decoder_attention_heads
+        self.dropout = dropout
+        self.attention_dropout = attention_dropout
+        self.activation_dropout = activation_dropout
+        self.activation_function = activation_function
+        self.init_std = init_std
+        self.encoder_layerdrop = encoder_layerdrop
+        self.decoder_layerdrop = decoder_layerdrop
+        self.classifier_dropout = classifier_dropout
+        self.use_cache = use_cache
+        self.use_scan = use_scan
+        self.fuse_matmuls = fuse_matmuls
+        self.num_hidden_layers = encoder_layers
+        self.scale_embedding = scale_embedding  # scale factor will be sqrt(d_model) if True
+
+        super().__init__(
+            num_labels=num_labels,
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            is_encoder_decoder=is_encoder_decoder,
+            decoder_start_token_id=decoder_start_token_id,
+            forced_eos_token_id=forced_eos_token_id,
+            **kwargs,
+        )
+
+        # ensure backward compatibility for BART CNN models
+        if self.forced_bos_token_id is None and kwargs.get("force_bos_token_to_be_generated", False):
+            self.forced_bos_token_id = self.bos_token_id
+            warnings.warn(
+                f"Please make sure the config includes `forced_bos_token_id={self.bos_token_id}` in future versions. "
+                "The config can simply be saved and uploaded again to be fixed."
+            )
diff --git a/models/configuration_speech_encoder_decoder.py b/models/configuration_speech_encoder_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..a60be81a7088085c1d56b1d52c68f1bada283064
--- /dev/null
+++ b/models/configuration_speech_encoder_decoder.py
@@ -0,0 +1,121 @@
+# coding=utf-8
+# Copyright 2021 The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+from models.configuration_wav2vec2 import Wav2Vec2Config
+from models.configuration_bart import BartConfig
+from transformers import AutoConfig
+
+
+logger = logging.get_logger(__name__)
+
+
+class SpeechEncoderDecoderConfig(PretrainedConfig):
+    r"""
+    [`SpeechEncoderDecoderConfig`] is the configuration class to store the configuration of a
+    [`SpeechEncoderDecoderModel`]. It is used to instantiate an Encoder Decoder model according to the specified
+    arguments, defining the encoder and decoder configs.
+
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+
+    Args:
+        kwargs (*optional*):
+            Dictionary of keyword arguments. Notably:
+
+                - **encoder** ([`PretrainedConfig`], *optional*) -- An instance of a configuration object that defines
+                  the encoder config.
+                - **decoder** ([`PretrainedConfig`], *optional*) -- An instance of a configuration object that defines
+                  the decoder config.
+
+    Examples:
+
+    ```python
+    >>> from transformers import BertConfig, Wav2Vec2Config, SpeechEncoderDecoderConfig, SpeechEncoderDecoderModel
+
+    >>> # Initializing a Wav2Vec2 & BERT style configuration
+    >>> config_encoder = Wav2Vec2Config()
+    >>> config_decoder = BertConfig()
+
+    >>> config = SpeechEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+
+    >>> # Initializing a Wav2Vec2Bert model from a Wav2Vec2 & bert-base-uncased style configurations
+    >>> model = SpeechEncoderDecoderModel(config=config)
+
+    >>> # Accessing the model configuration
+    >>> config_encoder = model.config.encoder
+    >>> config_decoder = model.config.decoder
+    >>> # set decoder config to causal lm
+    >>> config_decoder.is_decoder = True
+    >>> config_decoder.add_cross_attention = True
+
+    >>> # Saving the model, including its configuration
+    >>> model.save_pretrained("my-model")
+
+    >>> # loading model and config from pretrained folder
+    >>> encoder_decoder_config = SpeechEncoderDecoderConfig.from_pretrained("my-model")
+    >>> model = SpeechEncoderDecoderModel.from_pretrained("my-model", config=encoder_decoder_config)
+    ```"""
+    model_type = "speech-encoder-decoder"
+    is_composition = True
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        if "encoder" not in kwargs or "decoder" not in kwargs:
+            raise ValueError(
+                f"A configuraton of type {self.model_type} cannot be instantiated because not both `encoder` and `decoder` sub-configurations are passed, but only {kwargs}"
+            )
+
+        encoder_config = kwargs.pop("encoder")
+        decoder_config = kwargs.pop("decoder")
+
+        # TODO: Load configs from AutoConfig (as done in Transformers 🤗)
+        self.encoder = Wav2Vec2Config(**encoder_config)
+        self.decoder = BartConfig(**decoder_config)
+        self.is_encoder_decoder = True
+
+    @classmethod
+    def from_encoder_decoder_configs(
+        cls, encoder_config: PretrainedConfig, decoder_config: PretrainedConfig, **kwargs
+    ) -> PretrainedConfig:
+        r"""
+        Instantiate a [`SpeechEncoderDecoderConfig`] (or a derived class) from a pre-trained encoder model
+        configuration and decoder model configuration.
+
+        Returns:
+            [`SpeechEncoderDecoderConfig`]: An instance of a configuration object
+        """
+        logger.info("Setting `config.is_decoder=True` and `config.add_cross_attention=True` for decoder_config")
+        decoder_config.is_decoder = True
+        decoder_config.add_cross_attention = True
+
+        return cls(encoder=encoder_config.to_dict(), decoder=decoder_config.to_dict(), **kwargs)
+
+    def to_dict(self):
+        """
+        Serializes this instance to a Python dictionary. Override the default *to_dict()* from *PretrainedConfig*.
+
+        Returns:
+            `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
+        """
+        output = copy.deepcopy(self.__dict__)
+        output["encoder"] = self.encoder.to_dict()
+        output["decoder"] = self.decoder.to_dict()
+        output["model_type"] = self.__class__.model_type
+        return output
diff --git a/models/configuration_wav2vec2.py b/models/configuration_wav2vec2.py
new file mode 100644
index 0000000000000000000000000000000000000000..c93bc7e6df8d69fe5d8f0dce61ab5e590f7e361f
--- /dev/null
+++ b/models/configuration_wav2vec2.py
@@ -0,0 +1,344 @@
+# coding=utf-8
+# Copyright 2021 The Fairseq Authors and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Wav2Vec2 model configuration"""
+
+import functools
+import operator
+
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+
+
+logger = logging.get_logger(__name__)
+
+WAV_2_VEC_2_PRETRAINED_CONFIG_ARCHIVE_MAP = {
+    "facebook/wav2vec2-base-960h": "https://huggingface.co/facebook/wav2vec2-base-960h/resolve/main/config.json",
+    # See all Wav2Vec2 models at https://huggingface.co/models?filter=wav2vec2
+}
+
+
+class Wav2Vec2Config(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`Wav2Vec2Model`]. It is used to instantiate an
+    Wav2Vec2 model according to the specified arguments, defining the model architecture. Instantiating a configuration
+    with the defaults will yield a similar configuration to that of the Wav2Vec2
+    [facebook/wav2vec2-base-960h](https://huggingface.co/facebook/wav2vec2-base-960h) architecture.
+
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+
+
+    Args:
+        vocab_size (`int`, *optional*, defaults to 32):
+            Vocabulary size of the Wav2Vec2 model. Defines the number of different tokens that can be represented by
+            the `inputs_ids` passed when calling [`Wav2Vec2Model`] or [`TFWav2Vec2Model`]. Vocabulary size of the
+            model. Defines the different tokens that can be represented by the *inputs_ids* passed to the forward
+            method of [`Wav2Vec2Model`].
+        hidden_size (`int`, *optional*, defaults to 768):
+            Dimensionality of the encoder layers and the pooler layer.
+        num_hidden_layers (`int`, *optional*, defaults to 12):
+            Number of hidden layers in the Transformer encoder.
+        num_attention_heads (`int`, *optional*, defaults to 12):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        intermediate_size (`int`, *optional*, defaults to 3072):
+            Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
+        hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
+            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
+            `"relu"`, `"selu"` and `"gelu_new"` are supported.
+        hidden_dropout (`float`, *optional*, defaults to 0.1):
+            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
+        attention_dropout (`float`, *optional*, defaults to 0.1):
+            The dropout ratio for the attention probabilities.
+        final_dropout (`float`, *optional*, defaults to 0.1):
+            The dropout probability for the final projection layer of [`Wav2Vec2ForCTC`].
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        layer_norm_eps (`float`, *optional*, defaults to 1e-12):
+            The epsilon used by the layer normalization layers.
+        feat_extract_norm (`str`, *optional*, defaults to `"group"`):
+            The norm to be applied to 1D convolutional layers in feature encoder. One of `"group"` for group
+            normalization of only the first 1D convolutional layer or `"layer"` for layer normalization of all 1D
+            convolutional layers.
+        feat_proj_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout probability for output of the feature encoder.
+        feat_extract_activation (`str, `optional`, defaults to `"gelu"`):
+            The non-linear activation function (function or string) in the 1D convolutional layers of the feature
+            extractor. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
+        feat_quantizer_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout probabilitiy for quantized feature encoder states.
+        conv_dim (`Tuple[int]`, *optional*, defaults to `(512, 512, 512, 512, 512, 512, 512)`):
+            A tuple of integers defining the number of input and output channels of each 1D convolutional layer in the
+            feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
+        conv_stride (`Tuple[int]`, *optional*, defaults to `(5, 2, 2, 2, 2, 2, 2)`):
+            A tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length
+            of *conv_stride* defines the number of convolutional layers and has to match the length of *conv_dim*.
+        conv_kernel (`Tuple[int]`, *optional*, defaults to `(10, 3, 3, 3, 3, 3, 3)`):
+            A tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The
+            length of *conv_kernel* defines the number of convolutional layers and has to match the length of
+            *conv_dim*.
+        conv_bias (`bool`, *optional*, defaults to `False`):
+            Whether the 1D convolutional layers have a bias.
+        num_conv_pos_embeddings (`int`, *optional*, defaults to 128):
+            Number of convolutional positional embeddings. Defines the kernel size of 1D convolutional positional
+            embeddings layer.
+        num_conv_pos_embedding_groups (`int`, *optional*, defaults to 16):
+            Number of groups of 1D convolutional positional embeddings layer.
+        do_stable_layer_norm (`bool`, *optional*, defaults to `False`):
+            Whether to apply *stable* layer norm architecture of the Transformer encoder. `do_stable_layer_norm is
+            True` corresponds to applying layer norm before the attention layer, whereas `do_stable_layer_norm is
+            False` corresponds to applying layer norm after the attention layer.
+        apply_spec_augment (`bool`, *optional*, defaults to `True`):
+            Whether to apply *SpecAugment* data augmentation to the outputs of the feature encoder. For reference see
+            [SpecAugment: A Simple Data Augmentation Method for Automatic Speech
+            Recognition](https://arxiv.org/abs/1904.08779).
+        mask_time_prob (`float`, *optional*, defaults to 0.05):
+            Percentage (between 0 and 1) of all feature vectors along the time axis which will be masked. The masking
+            procecure generates ''mask_time_prob*len(time_axis)/mask_time_length'' independent masks over the axis. If
+            reasoning from the propability of each feature vector to be chosen as the start of the vector span to be
+            masked, *mask_time_prob* should be `prob_vector_start*mask_time_length`. Note that overlap may decrease the
+            actual percentage of masked vectors. This is only relevant if `apply_spec_augment is True`.
+        mask_time_length (`int`, *optional*, defaults to 10):
+            Length of vector span along the time axis.
+        mask_time_min_masks (`int`, *optional*, defaults to 2),:
+            The minimum number of masks of length `mask_feature_length` generated along the time axis, each time step,
+            irrespectively of `mask_feature_prob`. Only relevant if ''mask_time_prob*len(time_axis)/mask_time_length <
+            mask_time_min_masks''
+        mask_feature_prob (`float`, *optional*, defaults to 0.0):
+            Percentage (between 0 and 1) of all feature vectors along the feature axis which will be masked. The
+            masking procecure generates ''mask_feature_prob*len(feature_axis)/mask_time_length'' independent masks over
+            the axis. If reasoning from the propability of each feature vector to be chosen as the start of the vector
+            span to be masked, *mask_feature_prob* should be `prob_vector_start*mask_feature_length`. Note that overlap
+            may decrease the actual percentage of masked vectors. This is only relevant if `apply_spec_augment is
+            True`.
+        mask_feature_length (`int`, *optional*, defaults to 10):
+            Length of vector span along the feature axis.
+        mask_feature_min_masks (`int`, *optional*, defaults to 0),:
+            The minimum number of masks of length `mask_feature_length` generated along the feature axis, each time
+            step, irrespectively of `mask_feature_prob`. Only relevant if
+            ''mask_feature_prob*len(feature_axis)/mask_feature_length < mask_feature_min_masks''
+        num_codevectors_per_group (`int`, *optional*, defaults to 320):
+            Number of entries in each quantization codebook (group).
+        num_codevector_groups (`int`, *optional*, defaults to 2):
+            Number of codevector groups for product codevector quantization.
+        contrastive_logits_temperature (`float`, *optional*, defaults to 0.1):
+            The temperature *kappa* in the contrastive loss.
+        feat_quantizer_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout probabilitiy for the output of the feature encoder that's used by the quantizer.
+        num_negatives (`int`, *optional*, defaults to 100):
+            Number of negative samples for the contrastive loss.
+        codevector_dim (`int`, *optional*, defaults to 256):
+            Dimensionality of the quantized feature vectors.
+        proj_codevector_dim (`int`, *optional*, defaults to 256):
+            Dimensionality of the final projection of both the quantized and the transformer features.
+        diversity_loss_weight (`int`, *optional*, defaults to 0.1):
+            The weight of the codebook diversity loss component.
+        ctc_loss_reduction (`str`, *optional*, defaults to `"sum"`):
+            Specifies the reduction to apply to the output of `torch.nn.CTCLoss`. Only relevant when training an
+            instance of [`Wav2Vec2ForCTC`].
+        ctc_zero_infinity (`bool`, *optional*, defaults to `False`):
+            Whether to zero infinite losses and the associated gradients of `torch.nn.CTCLoss`. Infinite losses mainly
+            occur when the inputs are too short to be aligned to the targets. Only relevant when training an instance
+            of [`Wav2Vec2ForCTC`].
+        use_weighted_layer_sum (`bool`, *optional*, defaults to `False`):
+            Whether to use a weighted average of layer outputs with learned weights. Only relevant when using an
+            instance of [`Wav2Vec2ForSequenceClassification`].
+        classifier_proj_size (`int`, *optional*, defaults to 256):
+            Dimensionality of the projection before token mean-pooling for classification.
+        tdnn_dim (`Tuple[int]`, *optional*, defaults to `(512, 512, 512, 512, 1500)`):
+            A tuple of integers defining the number of output channels of each 1D convolutional layer in the *TDNN*
+            module of the *XVector* model. The length of *tdnn_dim* defines the number of *TDNN* layers.
+        tdnn_kernel (`Tuple[int]`, *optional*, defaults to `(5, 3, 3, 1, 1)`):
+            A tuple of integers defining the kernel size of each 1D convolutional layer in the *TDNN* module of the
+            *XVector* model. The length of *tdnn_kernel* has to match the length of *tdnn_dim*.
+        tdnn_dilation (`Tuple[int]`, *optional*, defaults to `(1, 2, 3, 1, 1)`):
+            A tuple of integers defining the dilation factor of each 1D convolutional layer in *TDNN* module of the
+            *XVector* model. The length of *tdnn_dilation* has to match the length of *tdnn_dim*.
+        xvector_output_dim (`int`, *optional*, defaults to 512):
+            Dimensionality of the *XVector* embedding vectors.
+        add_adapter (`bool`, *optional*, defaults to `False`):
+            Whether a convolutional network should be stacked on top of the Wav2Vec2 Encoder. Can be very useful for
+            warm-starting Wav2Vec2 for SpeechEncoderDecoder models.
+        adapter_kernel_size (`int`, *optional*, defaults to 3):
+            Kernel size of the convolutional layers in the adapter network. Only relevant if `add_adapter is True`.
+        adapter_stride (`int`, *optional*, defaults to 2):
+            Stride of the convolutional layers in the adapter network. Only relevant if `add_adapter is True`.
+        num_adapter_layers (`int`, *optional*, defaults to 3):
+            Number of convolutional layers that should be used in the adapter network. Only relevant if `add_adapter is
+            True`.
+        output_hidden_size (`int`, *optional*):
+            Dimensionality of the encoder output layer. If not defined, this defaults to *hidden-size*. Only relevant
+            if `add_adapter is True`.
+        use_scan (`bool`, *optional*, defaults to `False`):
+            Whether or not to use nn.scan in the Flax Wav2Vec2 transformer layers.
+
+    Example:
+
+    ```python
+    >>> from transformers import Wav2Vec2Model, Wav2Vec2Config
+
+    >>> # Initializing a Wav2Vec2 facebook/wav2vec2-base-960h style configuration
+    >>> configuration = Wav2Vec2Config()
+
+    >>> # Initializing a model from the facebook/wav2vec2-base-960h style configuration
+    >>> model = Wav2Vec2Model(configuration)
+
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+    model_type = "wav2vec2"
+
+    def __init__(
+        self,
+        vocab_size=32,
+        hidden_size=768,
+        num_hidden_layers=12,
+        num_attention_heads=12,
+        intermediate_size=3072,
+        hidden_act="gelu",
+        hidden_dropout=0.1,
+        activation_dropout=0.1,
+        attention_dropout=0.1,
+        feat_proj_dropout=0.0,
+        feat_quantizer_dropout=0.0,
+        final_dropout=0.1,
+        layerdrop=0.1,
+        initializer_range=0.02,
+        layer_norm_eps=1e-5,
+        feat_extract_norm="group",
+        feat_extract_activation="gelu",
+        conv_dim=(512, 512, 512, 512, 512, 512, 512),
+        conv_stride=(5, 2, 2, 2, 2, 2, 2),
+        conv_kernel=(10, 3, 3, 3, 3, 2, 2),
+        conv_bias=False,
+        num_conv_pos_embeddings=128,
+        num_conv_pos_embedding_groups=16,
+        do_stable_layer_norm=False,
+        apply_spec_augment=True,
+        mask_time_prob=0.05,
+        mask_time_length=10,
+        mask_time_min_masks=2,
+        mask_feature_prob=0.0,
+        mask_feature_length=10,
+        mask_feature_min_masks=0,
+        num_codevectors_per_group=320,
+        num_codevector_groups=2,
+        contrastive_logits_temperature=0.1,
+        num_negatives=100,
+        codevector_dim=256,
+        proj_codevector_dim=256,
+        diversity_loss_weight=0.1,
+        ctc_loss_reduction="sum",
+        ctc_zero_infinity=False,
+        use_weighted_layer_sum=False,
+        classifier_proj_size=256,
+        tdnn_dim=(512, 512, 512, 512, 1500),
+        tdnn_kernel=(5, 3, 3, 1, 1),
+        tdnn_dilation=(1, 2, 3, 1, 1),
+        xvector_output_dim=512,
+        pad_token_id=0,
+        bos_token_id=1,
+        eos_token_id=2,
+        add_adapter=False,
+        adapter_kernel_size=3,
+        adapter_stride=2,
+        num_adapter_layers=3,
+        output_hidden_size=None,
+        use_scan=False,
+        fuse_matmuls=False,
+        **kwargs
+    ):
+        super().__init__(**kwargs, pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id)
+        self.hidden_size = hidden_size
+        self.feat_extract_norm = feat_extract_norm
+        self.feat_extract_activation = feat_extract_activation
+        self.conv_dim = list(conv_dim)
+        self.conv_stride = list(conv_stride)
+        self.conv_kernel = list(conv_kernel)
+        self.conv_bias = conv_bias
+        self.num_conv_pos_embeddings = num_conv_pos_embeddings
+        self.num_conv_pos_embedding_groups = num_conv_pos_embedding_groups
+        self.num_feat_extract_layers = len(self.conv_dim)
+        self.num_hidden_layers = num_hidden_layers
+        self.intermediate_size = intermediate_size
+        self.hidden_act = hidden_act
+        self.num_attention_heads = num_attention_heads
+        self.hidden_dropout = hidden_dropout
+        self.attention_dropout = attention_dropout
+        self.activation_dropout = activation_dropout
+        self.feat_proj_dropout = feat_proj_dropout
+        self.final_dropout = final_dropout
+        self.layerdrop = layerdrop
+        self.layer_norm_eps = layer_norm_eps
+        self.initializer_range = initializer_range
+        self.vocab_size = vocab_size
+        self.do_stable_layer_norm = do_stable_layer_norm
+        self.use_weighted_layer_sum = use_weighted_layer_sum
+        self.use_scan = use_scan
+        self.fuse_matmuls = fuse_matmuls
+
+        if (
+            (len(self.conv_stride) != self.num_feat_extract_layers)
+            or (len(self.conv_kernel) != self.num_feat_extract_layers)
+            or (len(self.conv_dim) != self.num_feat_extract_layers)
+        ):
+            raise ValueError(
+                "Configuration for convolutional layers is incorrect. "
+                "It is required that `len(config.conv_dim)` == `len(config.conv_stride)` == `len(config.conv_kernel)`, "
+                f"but is `len(config.conv_dim) = {len(self.conv_dim)}`, `len(config.conv_stride) "
+                f"= {len(self.conv_stride)}`, `len(config.conv_kernel) = {len(self.conv_kernel)}`."
+            )
+
+        # fine-tuning config parameters for SpecAugment: https://arxiv.org/abs/1904.08779
+        self.apply_spec_augment = apply_spec_augment
+        self.mask_time_prob = mask_time_prob
+        self.mask_time_length = mask_time_length
+        self.mask_time_min_masks = mask_time_min_masks
+        self.mask_feature_prob = mask_feature_prob
+        self.mask_feature_length = mask_feature_length
+        self.mask_feature_min_masks = mask_feature_min_masks
+
+        # parameters for pretraining with codevector quantized representations
+        self.num_codevectors_per_group = num_codevectors_per_group
+        self.num_codevector_groups = num_codevector_groups
+        self.contrastive_logits_temperature = contrastive_logits_temperature
+        self.feat_quantizer_dropout = feat_quantizer_dropout
+        self.num_negatives = num_negatives
+        self.codevector_dim = codevector_dim
+        self.proj_codevector_dim = proj_codevector_dim
+        self.diversity_loss_weight = diversity_loss_weight
+
+        # ctc loss
+        self.ctc_loss_reduction = ctc_loss_reduction
+        self.ctc_zero_infinity = ctc_zero_infinity
+
+        # adapter
+        self.add_adapter = add_adapter
+        self.adapter_kernel_size = adapter_kernel_size
+        self.adapter_stride = adapter_stride
+        self.num_adapter_layers = num_adapter_layers
+        self.output_hidden_size = output_hidden_size or hidden_size
+
+        # SequenceClassification-specific parameter. Feel free to ignore for other classes.
+        self.classifier_proj_size = classifier_proj_size
+
+        # XVector-specific parameters. Feel free to ignore for other classes.
+        self.tdnn_dim = list(tdnn_dim)
+        self.tdnn_kernel = list(tdnn_kernel)
+        self.tdnn_dilation = list(tdnn_dilation)
+        self.xvector_output_dim = xvector_output_dim
+
+    @property
+    def inputs_to_logits_ratio(self):
+        return functools.reduce(operator.mul, self.conv_stride, 1)
diff --git a/models/modeling_flax_bart.py b/models/modeling_flax_bart.py
new file mode 100644
index 0000000000000000000000000000000000000000..08f77b24bf09be356713bbbc77955ec5d1da3b1b
--- /dev/null
+++ b/models/modeling_flax_bart.py
@@ -0,0 +1,816 @@
+# coding=utf-8
+# Copyright 2021 The Fairseq Authors and The Google Flax Team Authors And The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Flax Bart model."""
+
+import math
+import random
+from functools import partial
+from typing import Optional, Tuple
+
+import numpy as np
+
+import flax.linen as nn
+import jax
+import jax.numpy as jnp
+from flax.core.frozen_dict import FrozenDict, unfreeze
+from flax.linen import combine_masks, make_causal_mask
+from flax.linen import partitioning as nn_partitioning
+from flax.linen.attention import dot_product_attention_weights
+from jax import lax
+from jax.random import PRNGKey
+
+from transformers.modeling_flax_outputs import (
+    FlaxBaseModelOutputWithPastAndCrossAttentions,
+    FlaxCausalLMOutputWithCrossAttentions,
+)
+from transformers.modeling_flax_utils import ACT2FN, FlaxPreTrainedModel
+
+from models import BartConfig
+
+
+scan_with_axes = nn_partitioning.scan_with_axes
+remat = nn_partitioning.remat
+
+
+def shift_tokens_right(input_ids: np.array, pad_token_id: int, decoder_start_token_id: int) -> np.ndarray:
+    """
+    Shift input ids one token to the right.
+    """
+    shifted_input_ids = np.zeros_like(input_ids)
+    shifted_input_ids[:, 1:] = input_ids[:, :-1]
+    shifted_input_ids[:, 0] = decoder_start_token_id
+
+    shifted_input_ids = np.where(shifted_input_ids == -100, pad_token_id, shifted_input_ids)
+    return shifted_input_ids
+
+
+class FlaxBartAttention(nn.Module):
+    config: BartConfig
+    embed_dim: int
+    num_heads: int
+    dropout: float = 0.0
+    causal: bool = False
+    bias: bool = True
+    dtype: jnp.dtype = jnp.float32  # the dtype of the computation
+
+    def setup(self) -> None:
+        self.head_dim = self.embed_dim // self.num_heads
+        if self.head_dim * self.num_heads != self.embed_dim:
+            raise ValueError(
+                f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim}"
+                f" and `num_heads`: {self.num_heads})."
+            )
+
+        dense = partial(
+            nn.Dense,
+            self.embed_dim,
+            use_bias=self.bias,
+            dtype=self.dtype,
+            kernel_init=jax.nn.initializers.normal(self.config.init_std),
+        )
+
+        self.q_proj, self.k_proj, self.v_proj = dense(), dense(), dense()
+
+        self.fused_proj = nn.Dense(
+            self.embed_dim * 3,
+            use_bias=self.bias,
+            dtype=self.dtype,
+            kernel_init=jax.nn.initializers.normal(self.config.init_std),
+        )
+
+        self.fused_key_value = nn.Dense(
+            self.embed_dim * 2,
+            use_bias=self.bias,
+            dtype=self.dtype,
+            kernel_init=jax.nn.initializers.normal(self.config.init_std),
+        )
+
+        self.out_proj = dense()
+
+        self.dropout_layer = nn.Dropout(rate=self.dropout)
+
+        if self.causal:
+            self.causal_mask = make_causal_mask(
+                jnp.ones((1, self.config.max_position_embeddings), dtype="bool"), dtype="bool"
+            )
+
+    def _split_heads(self, hidden_states):
+        return hidden_states.reshape(hidden_states.shape[:2] + (self.num_heads, self.head_dim))
+
+    def _merge_heads(self, hidden_states):
+        return hidden_states.reshape(hidden_states.shape[:2] + (self.embed_dim,))
+
+    @nn.compact
+    def _concatenate_to_cache(self, key, value, query, attention_mask):
+        """
+        This function takes projected key, value states from a single input token and concatenates the states to cached
+        states from previous steps. This function is slighly adapted from the official Flax repository:
+        https://github.com/google/flax/blob/491ce18759622506588784b4fca0e4bf05f8c8cd/flax/linen/attention.py#L252
+        """
+        # detect if we're initializing by absence of existing cache data.
+        is_initialized = self.has_variable("cache", "cached_key")
+        cached_key = self.variable("cache", "cached_key", jnp.zeros, key.shape, key.dtype)
+        cached_value = self.variable("cache", "cached_value", jnp.zeros, value.shape, value.dtype)
+        cache_index = self.variable("cache", "cache_index", lambda: jnp.array(0, dtype=jnp.int32))
+
+        if is_initialized:
+            *batch_dims, max_length, num_heads, depth_per_head = cached_key.value.shape
+            # update key, value caches with our new 1d spatial slices
+            cur_index = cache_index.value
+            indices = (0,) * len(batch_dims) + (cur_index, 0, 0)
+            key = lax.dynamic_update_slice(cached_key.value, key, indices)
+            value = lax.dynamic_update_slice(cached_value.value, value, indices)
+            cached_key.value = key
+            cached_value.value = value
+            num_updated_cache_vectors = query.shape[1]
+            cache_index.value = cache_index.value + num_updated_cache_vectors
+            # causal mask for cached decoder self-attention: our single query position should only attend to those key positions that have already been generated and cached, not the remaining zero elements.
+            pad_mask = jnp.broadcast_to(
+                jnp.arange(max_length) < cur_index + num_updated_cache_vectors,
+                tuple(batch_dims) + (1, num_updated_cache_vectors, max_length),
+            )
+            attention_mask = combine_masks(pad_mask, attention_mask)
+        return key, value, attention_mask
+
+    def __call__(
+        self,
+        hidden_states: jnp.ndarray,
+        key_value_states: Optional[jnp.ndarray] = None,
+        attention_mask: Optional[jnp.ndarray] = None,
+        init_cache: bool = False,
+        deterministic: bool = True,
+    ) -> Tuple[jnp.ndarray]:
+        """Input shape: Batch x Time x Channel"""
+
+        # if key_value_states are provided this layer is used as a cross-attention layer
+        # for the decoder
+        is_cross_attention = key_value_states is not None
+        batch_size = hidden_states.shape[0]
+
+        if self.config.fuse_matmuls:
+            # get key, value proj
+            if is_cross_attention:
+                # get query proj
+                query_states = self.q_proj(hidden_states)
+                # cross_attentions
+                attention_states = self.fused_key_value(key_value_states)
+                key_states, value_states = jnp.split(attention_states, 2, axis=-1)
+            else:
+                attention_states = self.fused_proj(hidden_states)
+                query_states, key_states, value_states = jnp.split(attention_states, 3, axis=-1)
+
+        else:
+            # get query proj
+            query_states = self.q_proj(hidden_states)
+            # get key, value proj
+            if is_cross_attention:
+                # cross_attentions
+                key_states = self.k_proj(key_value_states)
+                value_states = self.v_proj(key_value_states)
+            else:
+                # self_attention
+                key_states = self.k_proj(hidden_states)
+                value_states = self.v_proj(hidden_states)
+
+        query_states = self._split_heads(query_states)
+        key_states = self._split_heads(key_states)
+        value_states = self._split_heads(value_states)
+
+        # handle cache prepare causal attention mask
+        if self.causal:
+            query_length, key_length = query_states.shape[1], key_states.shape[1]
+            if self.has_variable("cache", "cached_key"):
+                mask_shift = self.variables["cache"]["cache_index"]
+                max_decoder_length = self.variables["cache"]["cached_key"].shape[1]
+                causal_mask = lax.dynamic_slice(
+                    self.causal_mask, (0, 0, mask_shift, 0), (1, 1, query_length, max_decoder_length)
+                )
+            else:
+                causal_mask = self.causal_mask[:, :, :query_length, :key_length]
+            causal_mask = jnp.broadcast_to(causal_mask, (batch_size,) + causal_mask.shape[1:])
+
+        # combine masks if needed
+        if attention_mask is not None and self.causal:
+            attention_mask = jnp.broadcast_to(jnp.expand_dims(attention_mask, axis=(-3, -2)), causal_mask.shape)
+            attention_mask = combine_masks(attention_mask, causal_mask)
+        elif self.causal:
+            attention_mask = causal_mask
+        elif attention_mask is not None:
+            attention_mask = jnp.expand_dims(attention_mask, axis=(-3, -2))
+
+        # During fast autoregressive decoding, we feed one position at a time,
+        # and cache the keys and values step by step.
+        if self.causal and (self.has_variable("cache", "cached_key") or init_cache):
+            key_states, value_states, attention_mask = self._concatenate_to_cache(
+                key_states, value_states, query_states, attention_mask
+            )
+
+        # Convert the boolean attention mask to an attention bias.
+        if attention_mask is not None:
+            # attention mask in the form of attention bias
+            attention_bias = lax.select(
+                attention_mask > 0,
+                jnp.full(attention_mask.shape, 0.0).astype(self.dtype),
+                jnp.full(attention_mask.shape, float("-inf")).astype(self.dtype),
+            )
+        else:
+            attention_bias = None
+
+        dropout_rng = None
+        if not deterministic and self.dropout > 0.0:
+            dropout_rng = self.make_rng("dropout")
+
+        attn_weights = dot_product_attention_weights(
+            query_states,
+            key_states,
+            bias=attention_bias,
+            dropout_rng=dropout_rng,
+            dropout_rate=self.dropout,
+            broadcast_dropout=True,
+            deterministic=deterministic,
+            dtype=self.dtype,
+            precision=None,
+        )
+
+        attn_output = jnp.einsum("...hqk,...khd->...qhd", attn_weights, value_states)
+        attn_output = self._merge_heads(attn_output)
+        attn_output = self.out_proj(attn_output)
+
+        return attn_output, attn_weights
+
+
+class FlaxBartDecoderLayer(nn.Module):
+    config: BartConfig
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self) -> None:
+        self.embed_dim = self.config.d_model
+        self.self_attn = FlaxBartAttention(
+            config=self.config,
+            embed_dim=self.embed_dim,
+            num_heads=self.config.decoder_attention_heads,
+            dropout=self.config.attention_dropout,
+            causal=True,
+            dtype=self.dtype,
+        )
+        self.dropout_layer = nn.Dropout(rate=self.config.dropout)
+        self.activation_fn = ACT2FN[self.config.activation_function]
+        self.activation_dropout_layer = nn.Dropout(rate=self.config.activation_dropout)
+
+        self.self_attn_layer_norm = nn.LayerNorm(dtype=self.dtype, epsilon=1e-05)
+        self.encoder_attn = FlaxBartAttention(
+            config=self.config,
+            embed_dim=self.embed_dim,
+            num_heads=self.config.decoder_attention_heads,
+            dropout=self.config.attention_dropout,
+            dtype=self.dtype,
+        )
+        self.encoder_attn_layer_norm = nn.LayerNorm(dtype=self.dtype, epsilon=1e-05)
+        self.fc1 = nn.Dense(
+            self.config.encoder_ffn_dim,
+            dtype=self.dtype,
+            kernel_init=jax.nn.initializers.normal(self.config.init_std),
+        )
+        self.fc2 = nn.Dense(
+            self.embed_dim, dtype=self.dtype, kernel_init=jax.nn.initializers.normal(self.config.init_std)
+        )
+        self.final_layer_norm = nn.LayerNorm(dtype=self.dtype, epsilon=1e-05)
+
+    def __call__(
+        self,
+        hidden_states: jnp.ndarray,
+        attention_mask: jnp.ndarray,
+        encoder_hidden_states: Optional[jnp.ndarray] = None,
+        encoder_attention_mask: Optional[jnp.ndarray] = None,
+        init_cache: bool = False,
+        output_attentions: bool = True,
+        deterministic: bool = True,
+    ) -> Tuple[jnp.ndarray]:
+
+        if self.config.use_scan:
+            hidden_states = hidden_states[0]
+
+        residual = hidden_states
+
+        # Self Attention
+        hidden_states, self_attn_weights = self.self_attn(
+            hidden_states=hidden_states, attention_mask=attention_mask, init_cache=init_cache
+        )
+        hidden_states = self.dropout_layer(hidden_states, deterministic=deterministic)
+        hidden_states = residual + hidden_states
+        hidden_states = self.self_attn_layer_norm(hidden_states)
+
+        # Cross-Attention Block
+        cross_attn_weights = None
+        if encoder_hidden_states is not None:
+            residual = hidden_states
+
+            hidden_states, cross_attn_weights = self.encoder_attn(
+                hidden_states=hidden_states,
+                key_value_states=encoder_hidden_states,
+                attention_mask=encoder_attention_mask,
+            )
+            hidden_states = self.dropout_layer(hidden_states, deterministic=deterministic)
+            hidden_states = residual + hidden_states
+            hidden_states = self.encoder_attn_layer_norm(hidden_states)
+
+        # Fully Connected
+        residual = hidden_states
+        hidden_states = self.activation_fn(self.fc1(hidden_states))
+        hidden_states = self.activation_dropout_layer(hidden_states, deterministic=deterministic)
+        hidden_states = self.fc2(hidden_states)
+        hidden_states = self.dropout_layer(hidden_states, deterministic=deterministic)
+        hidden_states = residual + hidden_states
+        hidden_states = self.final_layer_norm(hidden_states)
+
+        outputs = (hidden_states,)
+
+        if output_attentions:
+            outputs += (self_attn_weights, cross_attn_weights)
+
+        if self.config.use_scan:
+            outputs = (outputs, None)
+
+        return outputs
+
+
+class FlaxBartDecoderLayerCollection(nn.Module):
+    config: BartConfig
+    dtype: jnp.dtype = jnp.float32  # the dtype of the computation
+
+    @nn.compact
+    def __call__(
+        self,
+        hidden_states,
+        attention_mask,
+        encoder_hidden_states: Optional[jnp.ndarray] = None,
+        encoder_attention_mask: Optional[jnp.ndarray] = None,
+        deterministic: bool = True,
+        init_cache: bool = False,
+        output_attentions: bool = False,
+        output_hidden_states: bool = False,
+        return_dict: bool = True,
+    ):
+        # decoder layers
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attns = () if output_attentions else None
+        all_cross_attentions = () if (output_attentions and encoder_hidden_states is not None) else None
+
+        num_decoder_layers = self.config.decoder_layers
+        BlockDecoderLayer = (
+            remat(
+                FlaxBartDecoderLayer,
+                static_argnums=(4, 5, 6),
+                prevent_cse=not self.config.use_scan,
+            )
+            if self.config.gradient_checkpointing
+            else FlaxBartDecoderLayer
+        )
+
+        if self.config.use_scan:
+            # since all decoder layers are the same, we use nn.scan directly
+            assert not output_attentions, "cannot use `scan` with `output_attentions` set to `True`"
+            assert not output_hidden_states, "cannot use `scan` with `output_hidden_states` set to `True`"
+            hidden_states = (hidden_states,)
+
+            # TODO: add layerdrop in checkpointed scan (note: default value for layerdrop in config is zero)
+            hidden_states, _ = scan_with_axes(
+                BlockDecoderLayer,
+                variable_axes={"params": 0, "cache": 0},
+                split_rngs={"params": True, "dropout": True},
+                in_axes=(nn.broadcast, nn.broadcast, nn.broadcast, nn.broadcast, nn.broadcast, nn.broadcast),
+                length=num_decoder_layers,
+            )(self.config, dtype=self.dtype, name="FlaxBartDecoderLayers")(
+                hidden_states,
+                attention_mask,
+                encoder_hidden_states,
+                encoder_attention_mask,
+                init_cache,
+                output_attentions,
+                deterministic,
+            )
+            hidden_states = hidden_states[0]
+
+        else:
+            for layer in range(num_decoder_layers):
+                if output_hidden_states:
+                    all_hidden_states += (hidden_states,)
+                # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
+                dropout_probability = random.uniform(0, 1)
+                if not deterministic and (dropout_probability < self.config.decoder_layerdrop):
+                    layer_outputs = (None, None, None)
+                else:
+                    layer_outputs = BlockDecoderLayer(self.config, dtype=self.dtype, name=str(layer),)(
+                        hidden_states,
+                        attention_mask,
+                        encoder_hidden_states,
+                        encoder_attention_mask,
+                        init_cache,
+                        output_attentions,
+                        deterministic,
+                    )
+
+                hidden_states = layer_outputs[0]
+                if output_attentions:
+                    all_self_attns += (layer_outputs[1],)
+
+                    if encoder_hidden_states is not None:
+                        all_cross_attentions += (layer_outputs[2],)
+
+            # add hidden states from the last decoder layer
+            if output_hidden_states:
+                all_hidden_states += (hidden_states,)
+
+        outputs = [hidden_states, all_hidden_states, all_self_attns, all_cross_attentions]
+
+        if not return_dict:
+            return tuple(v for v in outputs if v is not None)
+
+        return FlaxBaseModelOutputWithPastAndCrossAttentions(
+            last_hidden_state=hidden_states,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attns,
+            cross_attentions=all_cross_attentions,
+        )
+
+
+class FlaxBartDecoder(nn.Module):
+    config: BartConfig
+    embed_tokens: nn.Embed
+    dtype: jnp.dtype = jnp.float32  # the dtype of the computation
+
+    def setup(self):
+        self.dropout_layer = nn.Dropout(rate=self.config.dropout)
+
+        embed_dim = self.config.d_model
+        self.padding_idx = self.config.pad_token_id
+        self.max_target_positions = self.config.max_position_embeddings
+        self.embed_scale = math.sqrt(self.config.d_model) if self.config.scale_embedding else 1.0
+
+        # Bart is set up so that if padding_idx is specified then offset the embedding ids by 2
+        # and adjust num_embeddings appropriately. Other models don't have this hack
+        self.offset = 2
+        self.embed_positions = nn.Embed(
+            self.config.max_position_embeddings + self.offset,
+            embed_dim,
+            embedding_init=jax.nn.initializers.normal(self.config.init_std),
+        )
+
+        self.layers = FlaxBartDecoderLayerCollection(self.config, self.dtype)
+        self.layernorm_embedding = nn.LayerNorm(dtype=self.dtype, epsilon=1e-05)
+
+    def __call__(
+        self,
+        input_ids,
+        attention_mask,
+        position_ids,
+        encoder_hidden_states: Optional[jnp.ndarray] = None,
+        encoder_attention_mask: Optional[jnp.ndarray] = None,
+        init_cache: bool = False,
+        output_attentions: bool = False,
+        output_hidden_states: bool = False,
+        return_dict: bool = True,
+        deterministic: bool = True,
+    ):
+        input_shape = input_ids.shape
+        input_ids = input_ids.reshape(-1, input_shape[-1])
+
+        inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
+
+        # embed positions
+        positions = self.embed_positions(position_ids + self.offset)
+
+        hidden_states = inputs_embeds + positions
+        hidden_states = self.layernorm_embedding(hidden_states)
+
+        hidden_states = self.dropout_layer(hidden_states, deterministic=deterministic)
+
+        outputs = self.layers(
+            hidden_states,
+            attention_mask,
+            encoder_hidden_states,
+            encoder_attention_mask,
+            deterministic=deterministic,
+            init_cache=init_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+
+        if not return_dict:
+            return outputs
+
+        return FlaxBaseModelOutputWithPastAndCrossAttentions(
+            last_hidden_state=outputs.last_hidden_state,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+            cross_attentions=outputs.cross_attentions,
+        )
+
+
+class FlaxBartDecoderPreTrainedModel(FlaxPreTrainedModel):
+    config_class = BartConfig
+    base_model_prefix: str = "model"
+    module_class: nn.Module = None
+
+    def __init__(
+        self,
+        config: BartConfig,
+        input_shape: Tuple[int] = (1, 1),
+        seed: int = 0,
+        dtype: jnp.dtype = jnp.float32,
+        _do_init: bool = True,
+        **kwargs
+    ):
+        config.is_decoder = True
+        config.is_encoder_decoder = False
+        module = self.module_class(config=config, dtype=dtype, **kwargs)
+        super().__init__(config, module, input_shape=input_shape, seed=seed, dtype=dtype, _do_init=_do_init)
+
+    def init_weights(self, rng: jax.random.PRNGKey, input_shape: Tuple) -> FrozenDict:
+        # init input tensors
+        input_ids = jnp.zeros(input_shape, dtype="i4")
+        attention_mask = jnp.ones_like(input_ids)
+
+        batch_size, sequence_length = input_ids.shape
+        position_ids = jnp.broadcast_to(jnp.arange(sequence_length)[None, :], (batch_size, sequence_length))
+
+        params_rng, dropout_rng = jax.random.split(rng)
+        rngs = {"params": params_rng, "dropout": dropout_rng}
+        encoder_hidden_states = jnp.zeros(input_shape + (self.config.d_model,))
+        encoder_attention_mask = attention_mask
+        module_init_outputs = self.module.init(
+            rngs,
+            input_ids,
+            attention_mask,
+            position_ids,
+            encoder_hidden_states,
+            encoder_attention_mask,
+            return_dict=False,
+        )
+        return module_init_outputs["params"]
+
+    def init_cache(self, batch_size, max_length):
+        r"""
+        Args:
+            batch_size (`int`):
+                batch_size used for fast auto-regressive decoding. Defines the batch size of the initialized cache.
+            max_length (`int`):
+                maximum possible length for auto-regressive decoding. Defines the sequence length of the initialized
+                cache.
+        """
+        # init input variables to retrieve cache
+        input_ids = jnp.ones((batch_size, max_length), dtype="i4")
+        attention_mask = jnp.ones_like(input_ids, dtype="i4")
+        position_ids = jnp.broadcast_to(jnp.arange(jnp.atleast_2d(input_ids).shape[-1]), input_ids.shape)
+
+        init_variables = self.module.init(
+            jax.random.PRNGKey(0), input_ids, attention_mask, position_ids, return_dict=False, init_cache=True
+        )
+        return unfreeze(init_variables["cache"])
+
+    def __call__(
+        self,
+        input_ids: jnp.ndarray,
+        attention_mask: Optional[jnp.ndarray] = None,
+        position_ids: Optional[jnp.ndarray] = None,
+        encoder_hidden_states: Optional[jnp.ndarray] = None,
+        encoder_attention_mask: Optional[jnp.ndarray] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        train: bool = False,
+        params: dict = None,
+        past_key_values: dict = None,
+        dropout_rng: PRNGKey = None,
+    ):
+        """
+        Args:
+        input_ids (`jnp.ndarray` of shape `(target_batch_size, target_sequence_length)`):
+            Indices of decoder input sequence tokens in the vocabulary.
+
+            Indices can be obtained using [`BartTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details.
+
+            [What are decoder input IDs?](../glossary#decoder-input-ids)
+
+            For translation and summarization training, `decoder_input_ids` should be provided. If no
+            `decoder_input_ids` is provided, the model will create this tensor by shifting the `input_ids` to the right
+            for denoising pre-training following the paper.
+        attention_mask (`jnp.ndarray` of shape `(target_batch_size, target_sequence_length)`, *optional*):
+            Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
+            be used by default.
+
+            If you want to change padding behavior, you should modify to your needs. See diagram 1 in [the
+            paper](https://arxiv.org/abs/1910.13461) for more information on the default strategy.
+        position_ids (`numpy.ndarray` of shape `(target_batch_size, sequence_length)`, *optional*):
+            Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the
+            range `[0, config.max_position_embeddings - 1]`.
+        encoder_hidden_states (`jnp.ndarray` of shape `(batch_size, sequence_length, hidden_size)`):
+            A sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder.
+        encoder_attention_mask (`jnp.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
+            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+
+            [What are attention masks?](../glossary#attention-mask)
+        output_attentions (`bool`, *optional*):
+            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+            tensors for more detail.
+        output_hidden_states (`bool`, *optional*):
+            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+            more detail.
+        return_dict (`bool`, *optional*):
+            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+        past_key_values (`Dict[str, np.ndarray]`, *optional*, returned by `init_cache` or when passing previous `past_key_values`):
+            Dictionary of pre-computed hidden-states (key and values in the attention blocks) that can be used for fast
+            auto-regressive decoding. Pre-computed key and value hidden-states are of shape *[batch_size, max_length]*.
+        """
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.return_dict
+
+        if encoder_hidden_states is not None and encoder_attention_mask is None:
+            batch_size, sequence_length = encoder_hidden_states.shape[:2]
+            encoder_attention_mask = jnp.ones((batch_size, sequence_length))
+
+        # prepare decoder inputs
+        if attention_mask is None:
+            attention_mask = jnp.ones_like(input_ids)
+        if position_ids is None:
+            batch_size, sequence_length = input_ids.shape
+            position_ids = jnp.broadcast_to(jnp.arange(sequence_length)[None, :], (batch_size, sequence_length))
+
+        # Handle any PRNG if needed
+        rngs = {"dropout": dropout_rng} if dropout_rng is not None else {}
+
+        inputs = {"params": params or self.params}
+
+        # if past_key_values are passed then cache is already initialized a private flag init_cache has to be passed
+        # down to ensure cache is used. It has to be made sure that cache is marked as mutable so that it can be
+        # changed by FlaxBartAttention module
+        if past_key_values:
+            inputs["cache"] = past_key_values
+            mutable = ["cache"]
+        else:
+            mutable = False
+
+        outputs = self.module.apply(
+            inputs,
+            input_ids=jnp.array(input_ids, dtype="i4"),
+            attention_mask=jnp.array(attention_mask, dtype="i4"),
+            position_ids=jnp.array(position_ids, dtype="i4"),
+            encoder_hidden_states=encoder_hidden_states,
+            encoder_attention_mask=encoder_attention_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            deterministic=not train,
+            rngs=rngs,
+            mutable=mutable,
+        )
+
+        # add updated cache to model output
+        if past_key_values is not None and return_dict:
+            outputs, past_key_values = outputs
+            outputs["past_key_values"] = unfreeze(past_key_values["cache"])
+            return outputs
+        elif past_key_values is not None and not return_dict:
+            outputs, past_key_values = outputs
+            outputs = outputs[:1] + (unfreeze(past_key_values["cache"]),) + outputs[1:]
+
+        return outputs
+
+
+class FlaxBartDecoderWrapper(nn.Module):
+    """
+    This wrapper class is a helper class to correctly load pretrained checkpoints when the causal language model is
+    used in combination with the [`EncoderDecoderModel`] framework.
+    """
+
+    config: BartConfig
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        embed_dim = self.config.d_model
+        embed_tokens = nn.Embed(
+            self.config.vocab_size,
+            embed_dim,
+            embedding_init=jax.nn.initializers.normal(self.config.init_std),
+        )
+        self.decoder = FlaxBartDecoder(config=self.config, embed_tokens=embed_tokens, dtype=self.dtype)
+
+    def __call__(self, *args, **kwargs):
+        return self.decoder(*args, **kwargs)
+
+
+class FlaxBartForCausalLMModule(nn.Module):
+    """Bart Decoder Module with a language modeling head on top (linear layer with weights tied to the input embeddings)
+    e.g. for autoregressive tasks.
+    """
+
+    config: BartConfig
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        self.model = FlaxBartDecoderWrapper(config=self.config, dtype=self.dtype)
+        self.lm_head = nn.Dense(
+            self.config.vocab_size,
+            use_bias=False,
+            dtype=self.dtype,
+            kernel_init=jax.nn.initializers.normal(self.config.init_std),
+        )
+
+    def __call__(
+        self,
+        input_ids,
+        attention_mask,
+        position_ids,
+        encoder_hidden_states: Optional[jnp.ndarray] = None,
+        encoder_attention_mask: Optional[jnp.ndarray] = None,
+        init_cache: bool = False,
+        output_attentions: bool = False,
+        output_hidden_states: bool = False,
+        return_dict: bool = True,
+        deterministic: bool = True,
+    ):
+
+        outputs = self.model(
+            input_ids,
+            attention_mask,
+            position_ids,
+            encoder_hidden_states,
+            encoder_attention_mask,
+            deterministic=deterministic,
+            init_cache=init_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+
+        hidden_states = outputs[0]
+
+        if self.config.tie_word_embeddings:
+            shared_embedding = self.model.variables["params"]["decoder"]["embed_tokens"]["embedding"]
+            lm_logits = self.lm_head.apply({"params": {"kernel": shared_embedding.T}}, hidden_states)
+        else:
+            lm_logits = self.lm_head(hidden_states)
+
+        if not return_dict:
+            return (lm_logits,) + outputs[1:]
+
+        return FlaxCausalLMOutputWithCrossAttentions(
+            logits=lm_logits,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+            cross_attentions=outputs.cross_attentions,
+        )
+
+
+class FlaxBartForCausalLM(FlaxBartDecoderPreTrainedModel):
+    """Bart Decoder Model with a language modeling head on top (linear layer with weights tied to the input embeddings)
+    e.g. for autoregressive tasks.
+    """
+
+    module_class = FlaxBartForCausalLMModule
+
+    def prepare_inputs_for_generation(self, input_ids, max_length, attention_mask: Optional[jnp.DeviceArray] = None):
+        # initializing the cache
+        batch_size, seq_length = input_ids.shape
+
+        past_key_values = self.init_cache(batch_size, max_length)
+        # Note that usually one would have to put 0's in the attention_mask for x > input_ids.shape[-1] and x < cache_length.
+        # But since the decoder uses a causal mask, those positions are masked anyway.
+        # Thus, we can create a single static attention_mask here, which is more efficient for compilation
+        extended_attention_mask = jnp.ones((batch_size, max_length), dtype="i4")
+        if attention_mask is not None:
+            position_ids = attention_mask.cumsum(axis=-1) - 1
+            extended_attention_mask = lax.dynamic_update_slice(extended_attention_mask, attention_mask, (0, 0))
+        else:
+            position_ids = jnp.broadcast_to(jnp.arange(seq_length, dtype="i4")[None, :], (batch_size, seq_length))
+
+        return {
+            "past_key_values": past_key_values,
+            "attention_mask": extended_attention_mask,
+            "position_ids": position_ids,
+        }
+
+    def update_inputs_for_generation(self, model_outputs, model_kwargs):
+        model_kwargs["past_key_values"] = model_outputs.past_key_values
+        model_kwargs["position_ids"] = model_kwargs["position_ids"][:, -1:] + 1
+        return model_kwargs
diff --git a/models/modeling_flax_speech_encoder_decoder.py b/models/modeling_flax_speech_encoder_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..2bc78e7fd3cc5aaea20839c491a23b918ac30eaf
--- /dev/null
+++ b/models/modeling_flax_speech_encoder_decoder.py
@@ -0,0 +1,1245 @@
+# coding=utf-8
+# Copyright 2022 The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Classes to support Flax Speech-Encoder-Decoder architectures"""
+
+import os
+from functools import partial
+from typing import Optional, Tuple, Union, Dict
+
+import flax
+import flax.linen as nn
+import jax
+import jax.numpy as jnp
+from flax.core.frozen_dict import FrozenDict, unfreeze
+from jax import lax
+from jax.random import PRNGKey
+import numpy as np
+
+from transformers.modeling_flax_outputs import FlaxBaseModelOutput, FlaxCausalLMOutputWithCrossAttentions, FlaxSeq2SeqLMOutput
+from transformers.modeling_flax_utils import FlaxPreTrainedModel
+from transformers.utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings, ModelOutput
+from transformers.generation_flax_utils import FlaxLogitsProcessorList
+from models import (
+    FlaxWav2Vec2Model,
+    FlaxWav2Vec2Module,
+    FlaxBartForCausalLM,
+    FlaxBartForCausalLMModule,
+    BartConfig,
+    Wav2Vec2Config,
+    SpeechEncoderDecoderConfig,
+)
+
+logger = logging.get_logger(__name__)
+
+_CONFIG_FOR_DOC = "SpeechEncoderDecoderConfig"
+
+SPEECH_ENCODER_DECODER_START_DOCSTRING = r"""
+    This class can be used to initialize a speech-sequence-to-text-sequence model with any pretrained speech
+    autoencoding model as the encoder and any pretrained text autoregressive model as the decoder. The encoder is
+    loaded via [`~AutoModel.from_pretrained`] function and the decoder is loaded via
+    [`~AutoModelForCausalLM.from_pretrained`] function. Cross-attention layers are automatically added to the decoder
+    and should be fine-tuned on a downstream generative task, like summarization.
+
+    The effectiveness of initializing sequence-to-sequence models with pretrained checkpoints for sequence generation
+    tasks was shown in [Leveraging Pre-trained Checkpoints for Sequence Generation
+    Tasks](https://arxiv.org/abs/1907.12461) by Sascha Rothe, Shashi Narayan, Aliaksei Severyn. Michael Matena, Yanqi
+    Zhou, Wei Li, Peter J. Liu.
+
+    Additionally, in [Large-Scale Self- and Semi-Supervised Learning for Speech
+    Translation](https://arxiv.org/abs/2104.06678) it is shown how leveraging large pretrained speech models for speech
+    translation yields a significant performance improvement.
+
+    After such an Speech-Encoder Decoder model has been trained/fine-tuned, it can be saved/loaded just like any other
+    models (see the examples for more information).
+
+    This model inherits from [`FlaxPreTrainedModel`]. Check the superclass documentation for the generic methods the
+    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
+    etc.)
+
+    This model is also a Flax Linen
+    [flax.nn.Module](https://flax.readthedocs.io/en/latest/_autosummary/flax.nn.module.html) subclass. Use it as a
+    regular Flax Module and refer to the Flax documentation for all matter related to general usage and behavior.
+
+    Parameters:
+        config ([`SpeechEncoderDecoderConfig`]): Model configuration class with all the parameters of the model.
+            Initializing with a config file does not load the weights associated with the model, only the
+            configuration. Check out the [`~FlaxPreTrainedModel.from_pretrained`] method to load the model weights.
+        dtype (`jax.numpy.dtype`, *optional*, defaults to `jax.numpy.float32`):
+            The data type of the computation. Can be one of `jax.numpy.float32`, `jax.numpy.float16` (on GPUs) and
+            `jax.numpy.bfloat16` (on TPUs).
+
+            This can be used to enable mixed-precision training or half-precision inference on GPUs or TPUs. If
+            specified all the computation will be performed with the given `dtype`.
+
+            **Note that this only specifies the dtype of the computation and does not influence the dtype of model
+            parameters.**
+
+            If you wish to change the dtype of the model parameters, see [`~FlaxPreTrainedModel.to_fp16`] and
+            [`~FlaxPreTrainedModel.to_bf16`].
+"""
+
+SPEECH_ENCODER_DECODER_INPUTS_DOCSTRING = r"""
+    Args:
+        inputs (`jnp.ndarray` of shape `(batch_size, sequence_length)` or `(batch_size, sequence_length, feature_dim)`, *optional*):
+            Float values of input raw speech waveform or speech features. Values can be obtained by loading a *.flac*
+            or *.wav* audio file into an array of type *List[float]* or a *numpy.ndarray*, *e.g.* via the soundfile
+            library (*pip install soundfile*). To prepare the array into *inputs*, either the [`Wav2Vec2Processor`] or
+            [`Speech2TextProcessor`] should be used for padding and conversion into a tensor of type
+            *torch.FloatTensor*.
+        attention_mask (`jnp.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
+            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+
+            [What are attention masks?](../glossary#attention-mask)
+        decoder_input_ids (`jnp.ndarray` of shape `(batch_size, target_sequence_length)`, *optional*):
+            Indices of decoder input sequence tokens in the vocabulary.
+
+            Indices can be obtained using [`PreTrainedTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details.
+
+            [What are input IDs?](../glossary#input-ids)
+
+            If `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
+            `past_key_values`).
+
+            For sequence to sequence training, `decoder_input_ids` should be provided. `decoder_input_ids` should be
+            created outside of the model by shifting the `labels` to the right, replacing -100 by the `pad_token_id`
+            and prepending them with the `decoder_start_token_id`.
+        decoder_attention_mask (`jnp.ndarray` of shape `(batch_size, target_sequence_length)`, *optional*):
+            Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
+            be used by default.
+        decoder_position_ids (`numpy.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
+            Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the
+            range `[0, config.decoder.max_position_embeddings - 1]`.
+        output_hidden_states (`bool`, *optional*):
+            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+            more detail.
+        return_dict (`bool`, *optional*):
+            If set to `True`, the model will return a [`~utils.FlaxSeq2SeqLMOutput`] instead of a plain tuple.
+"""
+
+SPEECH_ENCODER_DECODER_ENCODE_INPUTS_DOCSTRING = r"""
+    Args:
+        inputs (`jnp.ndarray` of shape `(batch_size, sequence_length)` or `(batch_size, sequence_length, feature_dim)`, *optional*):
+            Float values of input raw speech waveform or speech features. Values can be obtained by loading a *.flac*
+            or *.wav* audio file into an array of type *List[float]* or a *numpy.ndarray*, *e.g.* via the soundfile
+            library (*pip install soundfile*). To prepare the array into *inputs*, either the [`Wav2Vec2Processor`] or
+            [`Speech2TextProcessor`] should be used for padding and conversion into a tensor of type
+            *torch.FloatTensor*.
+        attention_mask (`jnp.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
+            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+
+            [What are attention masks?](../glossary#attention-mask)
+        output_attentions (`bool`, *optional*):
+            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+            tensors for more detail.
+        output_hidden_states (`bool`, *optional*):
+            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+            more detail.
+        return_dict (`bool`, *optional*):
+            If set to `True`, the model will return a [`~utils.FlaxBaseModelOutput`] instead of a plain tuple.
+"""
+
+SPEECH_ENCODER_DECODER_DECODE_INPUTS_DOCSTRING = r"""
+    Args:
+        decoder_input_ids (`jnp.ndarray` of shape `(batch_size, target_sequence_length)`, *optional*):
+            Indices of decoder input sequence tokens in the vocabulary.
+
+            Indices can be obtained using [`PreTrainedTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details.
+
+            [What are decoder input IDs?](../glossary#decoder-input-ids)
+
+            If `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
+            `past_key_values`).
+
+            For sequence to sequence training, `decoder_input_ids` should be provided. `decoder_input_ids` should be
+            created outside of the model by shifting the `labels` to the right, replacing -100 by the `pad_token_id`
+            and prepending them with the `decoder_start_token_id`.
+        encoder_outputs (`tuple(tuple(jnp.ndarray)`):
+            Tuple consists of (`last_hidden_state`, *optional*: `hidden_states`, *optional*: `attentions`)
+            `last_hidden_state` of shape `(batch_size, sequence_length, hidden_size)`, *optional*) is a sequence of
+            hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder.
+        encoder_attention_mask (`jnp.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
+            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+
+            [What are attention masks?](../glossary#attention-mask)
+        decoder_attention_mask (`jnp.ndarray` of shape `(batch_size, target_sequence_length)`, *optional*):
+            Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
+            be used by default.
+        decoder_position_ids (`numpy.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
+            Indices of positions of each decoder input sequence tokens in the position embeddings. Selected in the
+            range `[0, config.decoder.max_position_embeddings - 1]`.
+        past_key_values (`Dict[str, np.ndarray]`, *optional*, returned by `init_cache` or when passing previous `past_key_values`):
+            Dictionary of pre-computed hidden-states (key and values in the attention blocks) that can be used for fast
+            auto-regressive decoding. Pre-computed key and value hidden-states are of shape *[batch_size, max_length]*.
+        output_attentions (`bool`, *optional*):
+            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+            tensors for more detail.
+        output_hidden_states (`bool`, *optional*):
+            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+            more detail.
+        return_dict (`bool`, *optional*):
+            If set to `True`, the model will return a [`~utils.FlaxCausalLMOutputWithCrossAttentions`] instead of a
+            plain tuple.
+"""
+
+@flax.struct.dataclass
+class FlaxBeamSearchOutput(ModelOutput):
+    """
+    Flax Base class for outputs of decoder-only generation models using greedy search.
+
+
+    Args:
+        sequences (`jnp.ndarray` of shape `(batch_size, max_length)`):
+            The generated sequences.
+        scores (`jnp.ndarray` of shape `(batch_size,)`):
+            The scores (log probabilites) of the generated sequences.
+    """
+
+    sequences: jnp.ndarray = None
+    scores: jnp.ndarray = None
+
+
+@flax.struct.dataclass
+class BeamSearchState:
+    cur_len: jnp.ndarray
+    running_sequences: jnp.ndarray
+    running_scores: jnp.ndarray
+    sequences: jnp.ndarray
+    scores: jnp.ndarray
+    is_sent_finished: jnp.ndarray
+    model_kwargs: Dict[str, jnp.ndarray]
+
+
+
+
+class FlaxSpeechEncoderDecoderModule(nn.Module):
+    config: SpeechEncoderDecoderConfig
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        encoder_config = self.config.encoder
+        decoder_config = self.config.decoder
+
+        # TODO: configure FlaxAutoModel mappings (required when trialling different encoder-decoder combinations)
+        encoder_module = FlaxWav2Vec2Module
+        decoder_module = FlaxBartForCausalLMModule
+
+        self.encoder = encoder_module(encoder_config, dtype=self.dtype)
+        self.decoder = decoder_module(decoder_config, dtype=self.dtype)
+
+        # encoder outputs might need to be projected to different dimension for decoder
+        if (
+            self.encoder.config.hidden_size != self.decoder.config.hidden_size
+            and self.decoder.config.cross_attention_hidden_size is None
+        ):
+            self.enc_to_dec_proj = nn.Dense(
+                self.decoder.config.hidden_size,
+                kernel_init=jax.nn.initializers.normal(self.decoder.config.initializer_range),
+                dtype=self.dtype,
+            )
+        else:
+            self.enc_to_dec_proj = None
+
+    def _get_feat_extract_output_lengths(
+        self, input_lengths: Union[jnp.ndarray, int], add_adapter: Optional[bool] = None
+    ):
+        """
+        Computes the output length of the convolutional layers
+        """
+
+        add_adapter = self.config.encoder.add_adapter if add_adapter is None else add_adapter
+
+        def _conv_out_length(input_length, kernel_size, stride):
+            # 1D convolutional layer output length formula taken
+            # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
+            return (input_length - kernel_size) // stride + 1
+
+        for kernel_size, stride in zip(self.config.encoder.conv_kernel, self.config.encoder.conv_stride):
+            input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
+
+        if add_adapter:
+            for _ in range(self.config.encoder.num_adapter_layers):
+                input_lengths = _conv_out_length(input_lengths, 1, self.config.encoder.adapter_stride)
+
+        return input_lengths
+
+    def _get_encoder_module(self):
+        return self.encoder
+
+    def _get_projection_module(self):
+        return self.enc_to_dec_proj
+
+    def _get_decoder_module(self):
+        return self.decoder
+
+    def __call__(
+        self,
+        inputs,
+        attention_mask,
+        decoder_input_ids,
+        decoder_attention_mask,
+        decoder_position_ids,
+        encoder_outputs=None,
+        extract_features=None,
+        output_attentions: bool = False,
+        output_hidden_states: bool = False,
+        output_features: bool = False,
+        return_dict: bool = True,
+        deterministic: bool = True,
+        freeze_feature_encoder: bool = False,
+    ):
+        if encoder_outputs is None:
+            encoder_outputs = self.encoder(
+                inputs,
+                attention_mask=attention_mask,
+                extract_features=extract_features,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                output_features=output_features,
+                return_dict=return_dict,
+                deterministic=deterministic,
+                freeze_feature_encoder=freeze_feature_encoder,
+            )
+
+        if output_features:
+            return encoder_outputs
+
+        encoder_hidden_states = encoder_outputs[0]
+
+        # optionally project encoder_hidden_states
+        if self.enc_to_dec_proj is not None:
+            encoder_hidden_states = self.enc_to_dec_proj(encoder_hidden_states)
+
+        # compute correct encoder attention mask
+        if attention_mask is not None:
+            encoder_attention_mask = self.encoder._get_feature_vector_attention_mask(
+                encoder_hidden_states.shape[1], attention_mask
+            )
+        else:
+            encoder_attention_mask = None
+
+        # flax script modeling_flax_wav2vec2.py
+        decoder_outputs = self.decoder(
+            input_ids=decoder_input_ids,
+            attention_mask=decoder_attention_mask,
+            position_ids=decoder_position_ids,
+            encoder_hidden_states=encoder_hidden_states,
+            encoder_attention_mask=encoder_attention_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            deterministic=deterministic,
+        )
+
+        if not return_dict:
+            return decoder_outputs + encoder_outputs
+
+        return FlaxSeq2SeqLMOutput(
+            logits=decoder_outputs.logits,
+            decoder_hidden_states=decoder_outputs.hidden_states,
+            decoder_attentions=decoder_outputs.attentions,
+            cross_attentions=decoder_outputs.cross_attentions,
+            encoder_last_hidden_state=encoder_hidden_states,
+            encoder_hidden_states=encoder_outputs.hidden_states,
+            encoder_attentions=encoder_outputs.attentions,
+        )
+
+
+@add_start_docstrings(SPEECH_ENCODER_DECODER_START_DOCSTRING)
+class FlaxSpeechEncoderDecoderModel(FlaxPreTrainedModel):
+    r"""
+    [`FlaxSpeechEncoderDecoderModel`] is a generic model class that will be instantiated as a transformer architecture
+    with the module (flax.nn.Module) of one of the base model classes of the library as encoder module and another one
+    as decoder module when created with the :meth*~transformers.FlaxAutoModel.from_pretrained* class method for the
+    encoder and :meth*~transformers.FlaxAutoModelForCausalLM.from_pretrained* class method for the decoder.
+    """
+
+    config_class = SpeechEncoderDecoderConfig
+    base_model_prefix: str = "speech_encoder_decoder"
+    module_class = FlaxSpeechEncoderDecoderModule
+
+    def __init__(
+        self,
+        config: SpeechEncoderDecoderConfig,
+        input_shape: Optional[Tuple] = None,
+        seed: int = 0,
+        dtype: jnp.dtype = jnp.float32,
+        _do_init: bool = True,
+        **kwargs
+    ):
+
+        if not _do_init:
+            raise ValueError(
+                "`FlaxSpeechEncoderDecoderModel` cannot be created without initializing, `_do_init` must be `True`."
+            )
+
+        if config.decoder.cross_attention_hidden_size is not None:
+            # Raise ValueError or option to project enc to dec hidden_size (eg EncAdapterLayer)
+            if config.decoder.cross_attention_hidden_size != config.encoder.hidden_size:
+                raise ValueError(
+                    "If `cross_attention_hidden_size` is specified in the decoder's configuration, "
+                    "it has to be equal to the encoder's `hidden_size`. "
+                    f"Got {config.decoder.cross_attention_hidden_size} for `config.decoder.cross_attention_hidden_size` "
+                    f"and {config.encoder.hidden_size} for `config.encoder.hidden_size`."
+                )
+
+        # make sure input & output embeddings are not tied
+        config.tie_word_embeddings = False
+        module = self.module_class(config=config, dtype=dtype, **kwargs)
+
+        if input_shape is None:
+            # speech encoders almost always downsample the sequence length dimension
+            encoder_input_length = 1024
+            decoder_input_length = module._get_feat_extract_output_lengths(encoder_input_length)
+            input_shape = ((1, encoder_input_length), (1, decoder_input_length))
+
+        super().__init__(config, module, input_shape=input_shape, seed=seed, dtype=dtype, _do_init=_do_init)
+
+    def init_weights(self, rng: jax.random.PRNGKey, input_shape: Tuple) -> FrozenDict:
+        encoder_input_shape, decoder_input_shape = input_shape
+
+        # init input DeviceArrays
+        inputs = jnp.zeros(encoder_input_shape, dtype="f4")
+        attention_mask = jnp.ones_like(inputs, dtype="i4")
+        decoder_input_ids = jnp.zeros(decoder_input_shape, dtype="i4")
+        decoder_attention_mask = jnp.ones_like(decoder_input_ids)
+
+        batch_size, sequence_length = inputs.shape
+
+        decoder_batch_size, decoder_sequence_length = decoder_input_ids.shape
+        if not decoder_batch_size == batch_size:
+            raise ValueError(
+                f"The inputs of encoder and decoder should have the same batch size, but got {batch_size} for encoder and {decoder_batch_size} for decoder."
+            )
+        decoder_position_ids = jnp.broadcast_to(
+            jnp.arange(decoder_sequence_length)[None, :], (decoder_batch_size, decoder_sequence_length)
+        )
+
+        params_rng, dropout_rng = jax.random.split(rng)
+        rngs = {"params": params_rng, "dropout": dropout_rng}
+
+        return self.module.init(
+            rngs,
+            inputs,
+            attention_mask,
+            decoder_input_ids,
+            decoder_attention_mask,
+            decoder_position_ids,
+        )["params"]
+
+    def init_cache(self, batch_size, max_length, encoder_outputs):
+        r"""
+        Args:
+            batch_size (`int`):
+                batch_size used for fast auto-regressive decoding. Defines the batch size of the initialized cache.
+            max_length (`int`):
+                maximum possible length for auto-regressive decoding. Defines the sequence length of the initialized
+                cache.
+            encoder_outputs (`Union[FlaxBaseModelOutput, tuple(tuple(jnp.ndarray)]`):
+                `encoder_outputs` consists of (`last_hidden_state`, *optional*: `hidden_states`, *optional*:
+                `attentions`). `last_hidden_state` of shape `(batch_size, sequence_length, hidden_size)`, *optional*)
+                is a sequence of hidden-states at the output of the last layer of the encoder. Used in the
+                cross-attention of the decoder.
+        """
+        # init input variables to retrieve cache
+        decoder_input_ids = jnp.ones((batch_size, max_length), dtype="i4")
+        decoder_attention_mask = jnp.ones_like(decoder_input_ids)
+        decoder_position_ids = jnp.broadcast_to(
+            jnp.arange(jnp.atleast_2d(decoder_input_ids).shape[-1]), decoder_input_ids.shape
+        )
+
+        def _decoder_forward(module, decoder_input_ids, decoder_attention_mask, decoder_position_ids, **kwargs):
+            decoder_module = module._get_decoder_module()
+            return decoder_module(
+                input_ids=decoder_input_ids,
+                attention_mask=decoder_attention_mask,
+                position_ids=decoder_position_ids,
+                **kwargs,
+            )
+
+        init_variables = self.module.init(
+            jax.random.PRNGKey(0),
+            decoder_input_ids=decoder_input_ids,
+            decoder_attention_mask=decoder_attention_mask,
+            decoder_position_ids=decoder_position_ids,
+            encoder_hidden_states=encoder_outputs[0],
+            init_cache=True,
+            method=_decoder_forward,  # we only need to call the decoder to init the cache
+        )
+        return unfreeze(init_variables["cache"])
+
+    def _get_feat_extract_output_lengths(
+        self, input_lengths: Union[jnp.ndarray, int], add_adapter: Optional[bool] = None
+    ):
+        return self.module._get_feat_extract_output_lengths(input_lengths, add_adapter=add_adapter)
+
+    @add_start_docstrings(SPEECH_ENCODER_DECODER_ENCODE_INPUTS_DOCSTRING)
+    @replace_return_docstrings(output_type=FlaxBaseModelOutput, config_class=_CONFIG_FOR_DOC)
+    def encode(
+        self,
+        inputs: jnp.ndarray,
+        attention_mask: Optional[jnp.ndarray] = None,
+        extract_features: Optional[jnp.ndarray] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        output_features: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        train: bool = False,
+        freeze_feature_encoder: bool = False,
+        params: dict = None,
+        dropout_rng: PRNGKey = None,
+    ):
+        r"""
+        Returns:
+
+        Example:
+
+        ```python
+        >>> from transformers import FlaxSpeechEncoderDecoderModel
+
+        >>> # initialize a wav2vec2-2-bart from pretrained wav2vec2 and bart models. Note that the cross-attention layers will be randomly initialized
+        >>> model = FlaxSpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
+        ...     "facebook/wav2vec2-large-lv60", "facebook/bart-large"
+        ... )
+
+        >>> inputs = jnp.ones((2, 5000), dtype=jnp.float32)
+        >>> encoder_outputs = model.encode(inputs)
+        ```"""
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.return_dict
+
+        if attention_mask is None:
+            attention_mask = jnp.ones_like(inputs, dtype="i4")
+
+        if extract_features is not None:
+            extract_features = jnp.array(extract_features, dtype="f4")
+
+        # Handle any PRNG if needed
+        rngs = {}
+        if dropout_rng is not None:
+            rngs["dropout"] = dropout_rng
+
+        def _encoder_forward(module, inputs, attention_mask, **kwargs):
+            encode_module = module._get_encoder_module()
+            return encode_module(inputs, attention_mask, **kwargs)
+
+        outputs = self.module.apply(
+            {"params": params or self.params},
+            inputs=jnp.array(inputs, dtype="f4"),
+            attention_mask=jnp.array(attention_mask, dtype="i4"),
+            extract_features=extract_features,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            output_features=output_features,
+            return_dict=return_dict,
+            deterministic=not train,
+            freeze_feature_encoder=freeze_feature_encoder,
+            rngs=rngs,
+            method=_encoder_forward,
+        )
+
+        if return_dict and not output_features:
+            outputs = FlaxBaseModelOutput(
+                last_hidden_state=outputs.last_hidden_state,
+                hidden_states=outputs.hidden_states,
+                attentions=outputs.attentions,
+            )
+
+        return outputs
+
+    @add_start_docstrings(SPEECH_ENCODER_DECODER_DECODE_INPUTS_DOCSTRING)
+    @replace_return_docstrings(output_type=FlaxCausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC)
+    def decode(
+        self,
+        decoder_input_ids,
+        encoder_outputs,
+        encoder_attention_mask: Optional[jnp.ndarray] = None,
+        decoder_attention_mask: Optional[jnp.ndarray] = None,
+        decoder_position_ids: Optional[jnp.ndarray] = None,
+        past_key_values: dict = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        train: bool = False,
+        params: dict = None,
+        dropout_rng: PRNGKey = None,
+    ):
+        r"""
+        Returns:
+
+        Example:
+
+        ```python
+        >>> from transformers import FlaxSpeechEncoderDecoderModel
+        >>> import jax.numpy as jnp
+
+        >>> # initialize a wav2vec2-2-bart from pretrained wav2vec2 and bart models. Note that the cross-attention layers will be randomly initialized
+        >>> model = FlaxSpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
+        ...     "facebook/wav2vec2-large-lv60", "facebook/bart-large"
+        ... )
+
+        >>> inputs = jnp.ones((2, 5000), dtype=jnp.float32)
+        >>> encoder_outputs = model.encode(inputs)
+
+        >>> decoder_start_token_id = model.config.decoder.bos_token_id
+        >>> decoder_input_ids = jnp.ones((inputs.shape[0], 1), dtype="i4") * decoder_start_token_id
+
+        >>> outputs = model.decode(decoder_input_ids, encoder_outputs)
+        >>> logits = outputs.logits
+        ```"""
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.return_dict
+
+        encoder_hidden_states = encoder_outputs[0]
+        if encoder_attention_mask is None:
+            batch_size, sequence_length = encoder_hidden_states.shape[:2]
+            encoder_attention_mask = jnp.ones((batch_size, sequence_length))
+
+        batch_size, sequence_length = decoder_input_ids.shape
+        if decoder_attention_mask is None:
+            decoder_attention_mask = jnp.ones((batch_size, sequence_length))
+
+        if decoder_position_ids is None:
+            if past_key_values is not None:
+                raise ValueError("Make sure to provide `decoder_position_ids` when passing `past_key_values`.")
+
+            decoder_position_ids = jnp.broadcast_to(
+                jnp.arange(sequence_length)[None, :], (batch_size, sequence_length)
+            )
+
+        # Handle any PRNG if needed
+        rngs = {}
+        if dropout_rng is not None:
+            rngs["dropout"] = dropout_rng
+
+        params = {"params": params or self.params}
+
+        # if past_key_values are passed then cache is already initialized a private flag init_cache has to be
+        # passed down to ensure cache is used. It has to be made sure that cache is marked as mutable so that
+        # it can be changed by FlaxBartAttention module
+        if past_key_values:
+            params["cache"] = past_key_values
+            mutable = ["cache"]
+        else:
+            mutable = False
+
+        def _decoder_forward(
+            module, decoder_input_ids, decoder_attention_mask, decoder_position_ids, encoder_hidden_states, **kwargs
+        ):
+
+            projection_module = module._get_projection_module()
+            decoder_module = module._get_decoder_module()
+
+            # optionally project encoder_hidden_states
+            if projection_module is not None:
+                encoder_hidden_states = projection_module(encoder_hidden_states)
+
+            return decoder_module(
+                decoder_input_ids,
+                decoder_attention_mask,
+                decoder_position_ids,
+                encoder_hidden_states,
+                **kwargs,
+            )
+
+        outputs = self.module.apply(
+            params,
+            decoder_input_ids=jnp.array(decoder_input_ids, dtype="i4"),
+            decoder_attention_mask=jnp.array(decoder_attention_mask, dtype="i4"),
+            decoder_position_ids=jnp.array(decoder_position_ids, dtype="i4"),
+            encoder_hidden_states=encoder_hidden_states,
+            encoder_attention_mask=jnp.array(encoder_attention_mask, dtype="i4"),
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            deterministic=not train,
+            rngs=rngs,
+            mutable=mutable,
+            method=_decoder_forward,
+        )
+
+        # add updated cache to model output
+        if past_key_values is not None and return_dict:
+            outputs, past = outputs
+            outputs["past_key_values"] = unfreeze(past["cache"])
+            return outputs
+        elif past_key_values is not None and not return_dict:
+            outputs, past = outputs
+            outputs = outputs[:1] + (unfreeze(past["cache"]),) + outputs[1:]
+
+        return outputs
+
+    @add_start_docstrings_to_model_forward(SPEECH_ENCODER_DECODER_INPUTS_DOCSTRING)
+    @replace_return_docstrings(output_type=FlaxSeq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
+    def __call__(
+        self,
+        inputs: jnp.ndarray,
+        attention_mask: Optional[jnp.ndarray] = None,
+        extract_features: Optional[jnp.ndarray] = None,
+        decoder_input_ids: Optional[jnp.ndarray] = None,
+        decoder_attention_mask: Optional[jnp.ndarray] = None,
+        decoder_position_ids: Optional[jnp.ndarray] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        output_features: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        train: bool = False,
+        freeze_feature_encoder: bool = False,
+        params: dict = None,
+        dropout_rng: PRNGKey = None,
+    ):
+        r"""
+        Returns:
+
+        Examples:
+
+        ```python
+        >>> from transformers import FlaxSpeechEncoderDecoderModel, BartTokenizer
+
+        >>> # load a fine-tuned wav2vec2-2-bart model
+        >>> model = FlaxSpeechEncoderDecoderModel.from_pretrained("patrickvonplaten/wav2vec2-2-bart-large")
+        >>> # load output tokenizer
+        >>> tokenizer_output = BartTokenizer.from_pretrained("facebook/bart-large")
+
+        >>> inputs = jnp.ones((2, 5000), dtype=jnp.float32)
+
+        >>> # use bart's special bos, pad and eos tokens
+        >>> model.config.decoder_start_token_id = model.decoder.config.bos_token_id
+        >>> model.config.pad_token_id = model.decoder.config.pad_token_id
+        >>> model.config.eos_token_id = model.decoder.config.eos_token_id
+
+        >>> outputs = model.generate(inputs)
+        # Assert something? More interesting input? dtype correct?
+        ```
+        """
+
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.return_dict
+
+        # prepare encoder inputs
+        if attention_mask is None:
+            attention_mask = jnp.ones_like(inputs, dtype="i4")
+
+        if extract_features is not None:
+            inputs = None  # we can omit passing the inputs to the model to save memory
+            extract_features = jnp.array(extract_features, dtype="f4")
+        else:
+            inputs = jnp.array(inputs, dtype="f4")
+
+        # prepare decoder inputs
+        if decoder_input_ids is None:
+            raise ValueError(
+                "`decoder_input_ids` cannot be `None`. For sequence to sequence training, `decoder_position_ids` must be specified as an input argument."
+            )
+        if decoder_attention_mask is None:
+            decoder_attention_mask = jnp.ones_like(decoder_input_ids)
+        if decoder_position_ids is None:
+            batch_size, sequence_length = decoder_input_ids.shape
+            decoder_position_ids = jnp.broadcast_to(
+                jnp.arange(sequence_length)[None, :], (batch_size, sequence_length)
+            )
+
+        # Handle any PRNG if needed
+        rngs = {"dropout": dropout_rng} if dropout_rng is not None else {}
+
+        return self.module.apply(
+            {"params": params or self.params},
+            inputs=inputs,
+            attention_mask=jnp.array(attention_mask, dtype="i4"),
+            extract_features=extract_features,
+            decoder_input_ids=jnp.array(decoder_input_ids, dtype="i4"),
+            decoder_attention_mask=jnp.array(decoder_attention_mask, dtype="i4"),
+            decoder_position_ids=jnp.array(decoder_position_ids, dtype="i4"),
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            output_features=output_features,
+            return_dict=return_dict,
+            deterministic=not train,
+            freeze_feature_encoder=freeze_feature_encoder,
+            rngs=rngs,
+        )
+
+    def prepare_inputs_for_generation(
+        self,
+        decoder_input_ids,
+        max_length,
+        attention_mask: Optional[jnp.DeviceArray] = None,
+        decoder_attention_mask: Optional[jnp.DeviceArray] = None,
+        encoder_outputs=None,
+        **kwargs
+    ):
+        # initializing the cache
+        batch_size, seq_length = decoder_input_ids.shape
+
+        past_key_values = self.init_cache(batch_size, max_length, encoder_outputs)
+        # Note that usually one would have to put 0's in the attention_mask for x > input.shape[-1] and x < cache_length.
+        # But since the decoder uses a causal mask, those positions are masked anyways.
+        # Thus we can create a single static attention_mask here, which is more efficient for compilation
+        extended_attention_mask = jnp.ones((batch_size, max_length), dtype="i4")
+        if decoder_attention_mask is not None:
+            decoder_position_ids = decoder_attention_mask.cumsum(axis=-1) - 1
+            extended_attention_mask = lax.dynamic_update_slice(extended_attention_mask, decoder_attention_mask, (0, 0))
+        else:
+            decoder_position_ids = jnp.broadcast_to(
+                jnp.arange(seq_length, dtype="i4")[None, :], (batch_size, seq_length)
+            )
+
+        return {
+            "past_key_values": past_key_values,
+            "encoder_outputs": encoder_outputs,
+            "encoder_attention_mask": attention_mask,
+            "decoder_attention_mask": extended_attention_mask,
+            "decoder_position_ids": decoder_position_ids,
+        }
+
+    def update_inputs_for_generation(self, model_outputs, model_kwargs):
+        model_kwargs["past_key_values"] = model_outputs.past_key_values
+        model_kwargs["decoder_position_ids"] = model_kwargs["decoder_position_ids"][:, -1:] + 1
+        return model_kwargs
+
+    @classmethod
+    def from_encoder_decoder_pretrained(
+        cls,
+        encoder_pretrained_model_name_or_path: Optional[Union[str, os.PathLike]] = None,
+        decoder_pretrained_model_name_or_path: Optional[Union[str, os.PathLike]] = None,
+        *model_args,
+        **kwargs
+    ) -> FlaxPreTrainedModel:
+        r"""
+        Instantiate an encoder and a decoder from one or two base classes of the library from pretrained model
+        checkpoints.
+
+        Params:
+            encoder_pretrained_model_name_or_path (`Union[str, os.PathLike]`, *optional*):
+                Information necessary to initiate the encoder. Can be either:
+
+                    - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
+                      Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
+                      user or organization name, like `dbmdz/bert-base-german-cased`.
+                    - A path to a *directory* containing model weights saved using
+                      [`~FlaxPreTrainedModel.save_pretrained`], e.g., `./my_model_directory/`.
+
+            decoder_pretrained_model_name_or_path (`Union[str, os.PathLike]`, *optional*, defaults to `None`):
+                Information necessary to initiate the decoder. Can be either:
+
+                    - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
+                      Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
+                      user or organization name, like `dbmdz/bert-base-german-cased`.
+                    - A path to a *directory* containing model weights saved using
+                      [`~FlaxPreTrainedModel.save_pretrained`], e.g., `./my_model_directory/`.
+
+            model_args (remaining positional arguments, *optional*):
+                All remaning positional arguments will be passed to the underlying model's `__init__` method.
+
+            kwargs (remaining dictionary of keyword arguments, *optional*):
+                Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
+                `output_attentions=True`).
+
+                - To update the encoder configuration, use the prefix *encoder_* for each configuration parameter.
+                - To update the decoder configuration, use the prefix *decoder_* for each configuration parameter.
+                - To update the parent model configuration, do not use a prefix for each configuration parameter.
+
+                Behaves differently depending on whether a `config` is provided or automatically loaded.
+
+        Example:
+
+        ```python
+        >>> from transformers import FlaxSpeechEncoderDecoderModel
+
+        >>> # initialize a wav2vec2-2-bart from pretrained wav2vec2 and bart models. Note that the cross-attention layers will be randomly initialized
+        >>> model = FlaxSpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
+        ...     "facebook/wav2vec2-large-lv60", "facebook/bart-large"
+        ... )
+        >>> # saving model after fine-tuning
+        >>> model.save_pretrained("./wav2vec2-2-bart-large")
+        >>> # load fine-tuned model
+        >>> model = FlaxSpeechEncoderDecoderModel.from_pretrained("./wav2vec2-2-bart-large")
+        ```"""
+
+        kwargs_encoder = {
+            argument[len("encoder_") :]: value for argument, value in kwargs.items() if argument.startswith("encoder_")
+        }
+
+        kwargs_decoder = {
+            argument[len("decoder_") :]: value for argument, value in kwargs.items() if argument.startswith("decoder_")
+        }
+
+        # remove encoder, decoder kwargs from kwargs
+        for key in kwargs_encoder.keys():
+            del kwargs["encoder_" + key]
+        for key in kwargs_decoder.keys():
+            del kwargs["decoder_" + key]
+
+        # Load and initialize the encoder and decoder
+        # The distinction between encoder and decoder at the model level is made
+        # by the value of the flag `is_decoder` that we need to set correctly.
+        encoder = kwargs_encoder.pop("model", None)
+        if encoder is None:
+            if encoder_pretrained_model_name_or_path is None:
+                raise ValueError(
+                    "If `encoder_model` is not defined as an argument, a `encoder_pretrained_model_name_or_path` has "
+                    "to be defined."
+                )
+
+            if "config" not in kwargs_encoder:
+                # TODO: AutoConfig .from_pretrained
+                encoder_config, kwargs_encoder = Wav2Vec2Config.from_pretrained(
+                    encoder_pretrained_model_name_or_path, **kwargs_encoder, return_unused_kwargs=True
+                )
+                if encoder_config.is_decoder is True or encoder_config.add_cross_attention is True:
+                    logger.info(
+                        f"Initializing {encoder_pretrained_model_name_or_path} as a encoder model "
+                        "from a decoder model. Cross-attention and casual mask are disabled."
+                    )
+                    encoder_config.is_decoder = False
+                    encoder_config.add_cross_attention = False
+
+                kwargs_encoder["config"] = encoder_config
+
+            # TODO: FlaxAutoModel .from_pretrained
+            encoder = FlaxWav2Vec2Model.from_pretrained(
+                encoder_pretrained_model_name_or_path, *model_args, **kwargs_encoder
+            )
+
+        decoder = kwargs_decoder.pop("model", None)
+        if decoder is None:
+            if decoder_pretrained_model_name_or_path is None:
+                raise ValueError(
+                    "If `decoder_model` is not defined as an argument, a `decoder_pretrained_model_name_or_path` has "
+                    "to be defined."
+                )
+
+            if "config" not in kwargs_decoder:
+                # TODO: AutoConfig .from_pretrained
+                decoder_config, kwargs_decoder = BartConfig.from_pretrained(
+                    decoder_pretrained_model_name_or_path, **kwargs_decoder, return_unused_kwargs=True
+                )
+                if decoder_config.is_decoder is False or decoder_config.add_cross_attention is False:
+                    logger.info(
+                        f"Initializing {decoder_pretrained_model_name_or_path} as a decoder model. "
+                        f"Cross attention layers are added to {decoder_pretrained_model_name_or_path} "
+                        f"and randomly initialized if {decoder_pretrained_model_name_or_path}'s architecture allows for "
+                        "cross attention layers."
+                    )
+                    decoder_config.is_decoder = True
+                    decoder_config.add_cross_attention = True
+
+                kwargs_decoder["config"] = decoder_config
+
+            if kwargs_decoder["config"].is_decoder is False or kwargs_decoder["config"].add_cross_attention is False:
+                logger.warning(
+                    f"Decoder model {decoder_pretrained_model_name_or_path} is not initialized as a decoder. "
+                    f"In order to initialize {decoder_pretrained_model_name_or_path} as a decoder, "
+                    "make sure that the attributes `is_decoder` and `add_cross_attention` of `decoder_config` "
+                    "passed to `.from_encoder_decoder_pretrained(...)` are set to `True` or do not pass a "
+                    "`decoder_config` to `.from_encoder_decoder_pretrained(...)`"
+                )
+
+            # TODO: FlaxAutoModelForCausalLM .from_pretrained
+            decoder = FlaxBartForCausalLM.from_pretrained(decoder_pretrained_model_name_or_path, **kwargs_decoder)
+
+        # instantiate config with corresponding kwargs
+        dtype = kwargs.pop("dtype", jnp.float32)
+        config = SpeechEncoderDecoderConfig.from_encoder_decoder_configs(encoder.config, decoder.config, **kwargs)
+
+        # make sure input & output word embeddings are not tied
+        config.tie_word_embeddings = False
+
+        # init model
+        model = cls(config, dtype=dtype)
+        model.params["encoder"] = encoder.params
+        model.params["decoder"] = decoder.params
+
+        return model
+
+    def _beam_search(
+        self,
+        input_ids: None,
+        max_length: Optional[int] = None,
+        pad_token_id: Optional[int] = None,
+        eos_token_id: Optional[int] = None,
+        length_penalty: Optional[float] = None,
+        early_stopping: Optional[bool] = None,
+        logits_processor: Optional[FlaxLogitsProcessorList] = None,
+        trace: bool = True,
+        params: Optional[Dict[str, jnp.ndarray]] = None,
+        model_kwargs: Optional[Dict[str, jnp.ndarray]] = None,
+    ):
+        """
+        This beam search function is heavily inspired by Flax's official example:
+        https://github.com/google/flax/blob/master/examples/wmt/train.py#L254
+        """
+
+        def flatten_beam_dim(tensor):
+            """Flattens the first two dimensions of a non-scalar array."""
+            # ignore scalars (e.g. cache index)
+            if tensor.ndim == 0 or tensor.ndim == 1:
+                return tensor
+            elif tensor.ndim == 6:
+                return tensor.reshape(tensor.shape[:1] + (tensor.shape[1] * tensor.shape[2],) + tensor.shape[3:])
+            return tensor.reshape((tensor.shape[0] * tensor.shape[1],) + tensor.shape[2:])
+
+        def unflatten_beam_dim(tensor, batch_size, num_beams):
+            """Unflattens the first, flat batch*beam dimension of a non-scalar array."""
+            # ignore scalars (e.g. cache index)
+            if tensor.ndim == 0 or tensor.ndim == 1:
+                return tensor
+            if tensor.ndim == 5:
+                return tensor.reshape(tensor.shape[:1] + (batch_size, num_beams) + tensor.shape[2:])
+            return tensor.reshape((batch_size, num_beams) + tensor.shape[1:])
+
+        def gather_beams(nested, beam_indices, batch_size, new_num_beams):
+            """
+            Gathers the beam slices indexed by beam_indices into new beam array.
+            """
+            batch_indices = jnp.reshape(
+                jnp.arange(batch_size * new_num_beams) // new_num_beams, (batch_size, new_num_beams)
+            )
+
+            def gather_fn(tensor):
+                # ignore scalars (e.g. cache index)
+                if tensor.ndim == 0 or tensor.ndim == 1:
+                    return tensor
+                if tensor.ndim == 6:
+                    return tensor[:, batch_indices, beam_indices]
+                return tensor[batch_indices, beam_indices]
+
+            return jax.tree_map(gather_fn, nested)
+
+        # init values
+        max_length = max_length if max_length is not None else self.config.max_length
+        pad_token_id = pad_token_id if pad_token_id is not None else self.config.pad_token_id
+        eos_token_id = eos_token_id if eos_token_id is not None else self.config.eos_token_id
+        length_penalty = length_penalty if length_penalty is not None else self.config.length_penalty
+        early_stopping = early_stopping if early_stopping is not None else self.config.early_stopping
+
+        batch_size, num_beams, cur_len = input_ids.shape
+
+        eos_token_id = jnp.array(eos_token_id)
+        pad_token_id = jnp.array(pad_token_id)
+        cur_len = jnp.array(cur_len)
+
+        # per batch,beam-item holding current token in loop.
+        sequences = jnp.full((batch_size, num_beams, max_length), pad_token_id, dtype=jnp.int32)
+        running_sequences = jnp.full((batch_size, num_beams, max_length), pad_token_id, dtype=jnp.int32)
+        running_sequences = lax.dynamic_update_slice(sequences, input_ids, (0, 0, 0))
+
+        # per batch,beam-item state bit indicating if sentence has finished.
+        is_sent_finished = jnp.zeros((batch_size, num_beams), dtype=jnp.bool_)
+
+        # per batch,beam-item score, logprobs
+        running_scores = jnp.tile(jnp.array([0.0] + [np.array(-1.0e7)] * (num_beams - 1)), [batch_size, 1])
+        scores = jnp.ones((batch_size, num_beams)) * np.array(-1.0e7)
+
+        # For Seq2Seq generation, we only need to use the decoder instead of the whole model in generation loop
+        # and pass it the `encoder_outputs`, which are part of the `model_kwargs`.
+        model = self.decode if self.config.is_encoder_decoder else self
+
+        # flatten beam dim
+        if "encoder_outputs" in model_kwargs:
+            model_kwargs["encoder_outputs"]["last_hidden_state"] = flatten_beam_dim(
+                model_kwargs["encoder_outputs"]["last_hidden_state"]
+            )
+        if "attention_mask" in model_kwargs:
+            model_kwargs["attention_mask"] = flatten_beam_dim(model_kwargs["attention_mask"])
+
+        # initialize model specific kwargs
+        model_kwargs = self.prepare_inputs_for_generation(flatten_beam_dim(input_ids), max_length, **model_kwargs)
+
+        # initialize state
+        state = BeamSearchState(
+            cur_len=cur_len,
+            running_sequences=running_sequences,
+            running_scores=running_scores,
+            sequences=sequences,
+            scores=scores,
+            is_sent_finished=is_sent_finished,
+            model_kwargs=model_kwargs,
+        )
+
+        def beam_search_cond_fn(state):
+            """beam search state termination condition fn."""
+
+            # 1. is less than max length?
+            not_max_length_yet = state.cur_len < max_length
+
+            # 2. can the new beams still improve?
+            best_running_score = state.running_scores[:, -1:] / (max_length**length_penalty)
+            worst_finished_score = jnp.where(
+                state.is_sent_finished, jnp.min(state.scores, axis=1, keepdims=True), np.array(-1.0e7)
+            )
+            improvement_still_possible = jnp.all(worst_finished_score < best_running_score)
+
+            # 3. is there still a beam that has not finished?
+            still_open_beam = ~(jnp.all(state.is_sent_finished) & early_stopping)
+
+            return not_max_length_yet & still_open_beam & improvement_still_possible
+
+        def beam_search_body_fn(state, input_ids_length=1):
+            """beam search state update fn."""
+            # 1. Forward current tokens
+            # Collect the current position slice along length to feed the fast
+            # autoregressive decoder model.  Flatten the beam dimension into batch
+            # dimension for feeding into the model.
+            # unflatten beam dimension
+            # Unflatten beam dimension in attention cache arrays
+            input_token = flatten_beam_dim(
+                lax.dynamic_slice(
+                    state.running_sequences,
+                    (0, 0, state.cur_len - input_ids_length),
+                    (batch_size, num_beams, input_ids_length),
+                )
+            )
+            model_outputs = model(input_token, params=params, **state.model_kwargs)
+
+            logits = unflatten_beam_dim(model_outputs.logits[:, -1], batch_size, num_beams)
+            cache = jax.tree_map(
+                lambda tensor: unflatten_beam_dim(tensor, batch_size, num_beams), model_outputs.past_key_values
+            )
+
+            # adapt logits for FlaxMarianMTModel
+            logits = self._adapt_logits_for_beam_search(logits)
+
+            # 2. Compute log probs
+            # get log probabilities from logits,
+            # process logits with processors (*e.g.* min_length, ...), and
+            # add new logprobs to existing running logprobs scores.
+            log_probs = jax.nn.log_softmax(logits)
+            log_probs = logits_processor(
+                flatten_beam_dim(running_sequences), flatten_beam_dim(log_probs), state.cur_len
+            )
+            log_probs = unflatten_beam_dim(log_probs, batch_size, num_beams)
+            log_probs = log_probs + jnp.expand_dims(state.running_scores, axis=2)
+            vocab_size = log_probs.shape[2]
+            log_probs = log_probs.reshape((batch_size, num_beams * vocab_size))
+
+            # 3. Retrieve top-K
+            # Each item in batch has num_beams * vocab_size candidate sequences.
+            # For each item, get the top 2*k candidates with the highest log-
+            # probabilities. We gather the top 2*K beams here so that even if the best
+            # K sequences reach EOS simultaneously, we have another K sequences
+            # remaining to continue the live beam search.
+            # Gather the top 2*K scores from _all_ beams.
+            # Gather 2*k top beams.
+            # Recover the beam index by floor division.
+            # Recover token id by modulo division and expand Id array for broadcasting.
+            # Update sequences for the 2*K top-k new sequences.
+            beams_to_keep = 2 * num_beams
+            topk_log_probs, topk_indices = lax.top_k(log_probs, k=beams_to_keep)
+            topk_beam_indices = topk_indices // vocab_size
+            topk_running_sequences = gather_beams(
+                state.running_sequences, topk_beam_indices, batch_size, beams_to_keep
+            )
+            topk_ids = jnp.expand_dims(topk_indices % vocab_size, axis=2)
+            topk_sequences = lax.dynamic_update_slice(topk_running_sequences, topk_ids, (0, 0, state.cur_len))
+
+            # 4. Check which sequences have ended
+            # Update current sequences:
+            # Did any of these sequences reach an end marker?
+            # To prevent these just finished sequences from being added to the current sequences
+            # set of active beam search sequences, set their log probs to a very large
+            # negative value.
+            did_topk_just_finished = topk_sequences[:, :, state.cur_len] == eos_token_id
+            running_topk_log_probs = topk_log_probs + did_topk_just_finished * np.array(-1.0e7)
+            # 5. Get running sequences scores for next
+            # Determine the top k beam indices (from top 2*k beams) from log probs
+            # and gather top k beams (from top 2*k beams).
+            next_topk_indices = jnp.flip(lax.top_k(running_topk_log_probs, k=num_beams)[1], axis=1)
+            next_running_sequences, next_running_scores = gather_beams(
+                [topk_sequences, running_topk_log_probs], next_topk_indices, batch_size, num_beams
+            )
+
+            # 6. Process topk logits
+            # Further process log probs:
+            # - add length penalty
+            # - make sure no scores can be added anymore if beam is full
+            # - make sure still running sequences cannot be chosen as finalized beam
+            topk_log_probs = topk_log_probs / (state.cur_len**length_penalty)
+            beams_in_batch_are_full = (
+                jnp.broadcast_to(state.is_sent_finished.all(axis=-1, keepdims=True), did_topk_just_finished.shape)
+                & early_stopping
+            )
+            add_penalty = ~did_topk_just_finished | beams_in_batch_are_full
+            topk_log_probs += add_penalty * np.array(-1.0e7)
+
+            # 7. Get scores, sequences, is sentence finished for next.
+            # Combine sequences, scores, and flags along the beam dimension and compare
+            # new finished sequence scores to existing finished scores and select the
+            # best from the new set of beams
+            merged_sequences = jnp.concatenate([state.sequences, topk_sequences], axis=1)
+            merged_scores = jnp.concatenate([state.scores, topk_log_probs], axis=1)
+            merged_is_sent_finished = jnp.concatenate([state.is_sent_finished, did_topk_just_finished], axis=1)
+            topk_merged_indices = jnp.flip(lax.top_k(merged_scores, k=num_beams)[1], axis=1)
+            next_sequences, next_scores, next_is_sent_finished = gather_beams(
+                [merged_sequences, merged_scores, merged_is_sent_finished], topk_merged_indices, batch_size, num_beams
+            )
+
+            # 8. Update model kwargs.
+            # Determine the top k beam indices from the original set of all beams.
+            # With these, gather the top k beam-associated caches.
+            next_running_indices = gather_beams(topk_beam_indices, next_topk_indices, batch_size, num_beams)
+            next_cache = gather_beams(cache, next_running_indices, batch_size, num_beams)
+            model_outputs["past_key_values"] = jax.tree_map(lambda x: flatten_beam_dim(x), next_cache)
+            next_model_kwargs = self.update_inputs_for_generation(model_outputs, state.model_kwargs)
+
+            return BeamSearchState(
+                cur_len=state.cur_len + 1,
+                running_scores=next_running_scores,
+                running_sequences=next_running_sequences,
+                scores=next_scores,
+                sequences=next_sequences,
+                is_sent_finished=next_is_sent_finished,
+                model_kwargs=next_model_kwargs,
+            )
+
+        # The very first prompt often has sequence length > 1, so run outside of `lax.while_loop` to comply with TPU
+        if input_ids.shape[-1] > 1:
+            state = partial(beam_search_body_fn, input_ids_length=input_ids.shape[-1])(state)
+
+        if not trace:
+            state = self._run_loop_in_debug(beam_search_cond_fn, beam_search_body_fn, state)
+        else:
+            state = lax.while_loop(beam_search_cond_fn, beam_search_body_fn, state)
+
+        # Account for the edge-case where there are no finished sequences for a
+        # particular batch item. If so, return running sequences for that batch item.
+        none_finished = jnp.any(state.is_sent_finished, axis=1)
+        sequences = jnp.where(none_finished[:, None, None], state.sequences, state.running_sequences)
+        scores = jnp.where(none_finished[:, None], state.scores, state.running_scores)
+
+        # return all beams for each batch and the best score
+        sequences = sequences[:, :]
+        scores = scores[:, -1]
+
+        return FlaxBeamSearchOutput(sequences=sequences, scores=scores)
diff --git a/models/modeling_flax_wav2vec2.py b/models/modeling_flax_wav2vec2.py
new file mode 100644
index 0000000000000000000000000000000000000000..8bf1a50af1dc5dce32577b8f8d61806afafde117
--- /dev/null
+++ b/models/modeling_flax_wav2vec2.py
@@ -0,0 +1,975 @@
+# coding=utf-8
+# Copyright 2021 The Fairseq Authors and the HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Flax Wav2Vec2 model."""
+
+from functools import partial
+from typing import Optional, Tuple, Union
+
+import flax
+import flax.linen as nn
+import jax
+import jax.numpy as jnp
+from flax.core.frozen_dict import FrozenDict
+from flax.linen import partitioning as nn_partitioning
+from flax.linen.attention import dot_product_attention_weights
+from jax import lax
+
+from transformers.modeling_flax_outputs import FlaxBaseModelOutput, FlaxCausalLMOutput
+from transformers.modeling_flax_utils import ACT2FN, FlaxPreTrainedModel
+from transformers.utils import ModelOutput
+
+from models import Wav2Vec2Config
+
+scan_with_axes = nn_partitioning.scan_with_axes
+remat = nn_partitioning.remat
+
+
+@flax.struct.dataclass
+class FlaxWav2Vec2BaseModelOutput(ModelOutput):
+    """
+    Output type of [`FlaxWav2Vec2BaseModelOutput`], with potential hidden states and attentions.
+
+    Args:
+        last_hidden_state (`jnp.ndarray` of shape `(batch_size, sequence_length, hidden_size)`):
+            Sequence of hidden-states at the output of the last layer of the model.
+        extract_features (`jnp.ndarray` of shape `(batch_size, sequence_length, last_conv_dim)`):
+            Sequence of extracted feature vectors of the last convolutional layer of the model with `last_conv_dim`
+            being the dimension of the last convolutional layer.
+        hidden_states (`tuple(jnp.ndarray)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
+            Tuple of `jnp.ndarray` (one for the output of the embeddings + one for the output of each layer) of shape
+            `(batch_size, sequence_length, hidden_size)`.
+
+            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
+        attentions (`tuple(jnp.ndarray)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
+            Tuple of `jnp.ndarray` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
+            sequence_length)`.
+
+            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
+            heads.
+    """
+
+    last_hidden_state: jnp.ndarray = None
+    extract_features: jnp.ndarray = None
+    hidden_states: Optional[Tuple[jnp.ndarray]] = None
+    attentions: Optional[Tuple[jnp.ndarray]] = None
+
+
+WAV_2_VEC_2_START_DOCSTRING = r"""
+    Wav2Vec2 was proposed in [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech
+    Representations](https://arxiv.org/abs/2006.11477) by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael
+    Auli.
+
+    This model inherits from [`FlaxPreTrainedModel`]. Check the superclass documentation for the generic methods the
+    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
+    etc.)
+
+    This model is also a Flax Linen
+    [flax.nn.Module](https://flax.readthedocs.io/en/latest/_autosummary/flax.nn.module.html) subclass. Use it as a
+    regular Flax Module and refer to the Flax documentation for all matter related to general usage and behavior.
+
+    Finally, this model supports inherent JAX features such as:
+
+    - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
+    - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
+    - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
+    - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)
+
+    Parameters:
+        config ([`Wav2Vec2Config`]): Model configuration class with all the parameters of the model.
+            Initializing with a config file does not load the weights associated with the model, only the
+            configuration. Check out the [`~FlaxPreTrainedModel.from_pretrained`] method to load the model weights.
+        dtype (`jax.numpy.dtype`, *optional*, defaults to `jax.numpy.float32`):
+            The data type of the computation. Can be one of `jax.numpy.float32`, `jax.numpy.float16` (on GPUs) and
+            `jax.numpy.bfloat16` (on TPUs).
+
+            This can be used to enable mixed-precision training or half-precision inference on GPUs or TPUs. If
+            specified all the computation will be performed with the given `dtype`.
+
+            **Note that this only specifies the dtype of the computation and does not influence the dtype of model
+            parameters.**
+
+            If you wish to change the dtype of the model parameters, see [`~FlaxPreTrainedModel.to_fp16`] and
+            [`~FlaxPreTrainedModel.to_bf16`].
+"""
+
+
+WAV_2_VEC_2_INPUTS_DOCSTRING = r"""
+    Args:
+        input_values (`jnp.ndarray` of shape `(batch_size, sequence_length)`):
+            Float values of input raw speech waveform. Values can be obtained by loading a *.flac* or *.wav* audio file
+            into an array of type *List[float]* or a *numpy.ndarray*, *e.g.* via the soundfile library (*pip install
+            soundfile*). To prepare the array into *input_values*, the [`Wav2Vec2Processor`] should be used for padding
+            and conversion into a tensor of type *jnp.ndarray*. See [`Wav2Vec2Processor.__call__`] for details.
+        attention_mask (`jnp.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
+            Mask to avoid performing convolution and attention on padding token indices. Mask values selected in `[0,
+            1]`:
+
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+
+            [What are attention masks?](../glossary#attention-mask) .. warning:: `attention_mask` should only be passed
+            if the corresponding processor has `config.return_attention_mask == True`. For all models whose processor
+            has `config.return_attention_mask == False`, such as
+            [wav2vec2-base](https://huggingface.co/facebook/wav2vec2-base-960h), `attention_mask` should **not** be
+            passed to avoid degraded performance when doing batched inference. For such models `input_values` should
+            simply be padded with 0 and passed without `attention_mask`. Be aware that these models also yield slightly
+            different results depending on whether `input_values` is padded or not.
+        mask_time_indices (`jnp.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
+            Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict
+            masked extracted features in *config.proj_codevector_dim* space.
+        output_attentions (`bool`, *optional*):
+            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+            tensors for more detail.
+        output_hidden_states (`bool`, *optional*):
+            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+            more detail.
+        return_dict (`bool`, *optional*):
+            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+"""
+
+
+class FlaxWav2Vec2LayerNormConvLayer(nn.Module):
+    config: Wav2Vec2Config
+    layer_id: int = 0
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        self.in_conv_dim = self.config.conv_dim[self.layer_id] if self.layer_id > 0 else 1
+        self.out_conv_dim = self.config.conv_dim[self.layer_id]
+
+        self.conv = nn.Conv(
+            features=self.config.conv_dim[self.layer_id],
+            kernel_size=(self.config.conv_kernel[self.layer_id],),
+            strides=(self.config.conv_stride[self.layer_id],),
+            use_bias=self.config.conv_bias,
+            kernel_init=jax.nn.initializers.he_normal(),
+            padding="VALID",
+            dtype=self.dtype,
+        )
+        self.layer_norm = nn.LayerNorm(epsilon=self.config.layer_norm_eps, dtype=self.dtype)
+        self.activation = ACT2FN[self.config.feat_extract_activation]
+
+    def __call__(self, hidden_states):
+        hidden_states = self.conv(hidden_states)
+        hidden_states = self.layer_norm(hidden_states)
+        hidden_states = self.activation(hidden_states)
+        return hidden_states
+
+
+class FlaxConvWithWeightNorm(nn.Module):
+    config: Wav2Vec2Config
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        self.conv = nn.Conv(
+            features=self.config.hidden_size,
+            kernel_size=(self.config.num_conv_pos_embeddings,),
+            kernel_init=jax.nn.initializers.he_normal(),
+            padding="VALID",
+            feature_group_count=self.config.num_conv_pos_embedding_groups,
+            dtype=self.dtype,
+        )
+        weight_shape = (
+            self.conv.features,
+            self.conv.features // self.conv.feature_group_count,
+            self.conv.kernel_size[0],
+        )
+        self.weight_v = self.param("weight_v", jax.nn.initializers.he_normal(), weight_shape)
+        self.weight_g = self.param("weight_g", lambda _: jnp.linalg.norm(self.weight_v, axis=(0, 1))[None, None, :])
+        self.bias = self.param("bias", jax.nn.initializers.zeros, (self.conv.features,))
+        self.prev_padding = self.conv.kernel_size[0] // 2
+
+    def _get_normed_weights(self):
+        weight_v_norm = jnp.linalg.norm(self.weight_v, axis=(0, 1))[None, None, :]
+        normed_weight_v = jnp.divide(self.weight_v, weight_v_norm)
+        normed_kernel = jnp.multiply(normed_weight_v, self.weight_g)
+        return normed_kernel
+
+    def __call__(self, hidden_states):
+        kernel = self._get_normed_weights()
+        hidden_states = jnp.pad(hidden_states, ((0, 0), (self.prev_padding, self.prev_padding), (0, 0)))
+        hidden_states = self.conv.apply({"params": {"kernel": kernel.T, "bias": self.bias}}, hidden_states)
+        return hidden_states
+
+
+class FlaxWav2Vec2PositionalConvEmbedding(nn.Module):
+    config: Wav2Vec2Config
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        self.conv = FlaxConvWithWeightNorm(self.config, dtype=self.dtype)
+        self.activation = ACT2FN[self.config.feat_extract_activation]
+        self.num_pad_remove = 1 if self.config.num_conv_pos_embeddings % 2 == 0 else 0
+
+    def __call__(self, hidden_states):
+        hidden_states = hidden_states.transpose((0, 1, 2))
+
+        hidden_states = self.conv(hidden_states)
+
+        if self.num_pad_remove > 0:
+            hidden_states = hidden_states[:, : -self.num_pad_remove, :]
+        hidden_states = self.activation(hidden_states)
+
+        hidden_states = hidden_states.transpose((0, 1, 2))
+        return hidden_states
+
+
+class FlaxConvLayersCollection(nn.Module):
+    config: Wav2Vec2Config
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        if self.config.feat_extract_norm == "layer":
+            # note that we can't use scan on the conv layers as they differ on a layer-by-layer basis
+            BlockLayer = remat(FlaxWav2Vec2LayerNormConvLayer) if self.config.gradient_checkpointing else FlaxWav2Vec2LayerNormConvLayer
+            self.layers = [
+                BlockLayer(self.config, layer_id=i, name=str(i), dtype=self.dtype)
+                for i in range(self.config.num_feat_extract_layers)
+            ]
+        elif self.config.feat_extract_norm == "group":
+            raise NotImplementedError("At the moment only ``config.feat_extact_norm == 'layer'`` is supported")
+        else:
+            raise ValueError(
+                f"`config.feat_extract_norm` is {self.config.feat_extract_norm}, but has to be one of ['group', 'layer']"
+            )
+
+    def __call__(self, hidden_states):
+        for i, conv_layer in enumerate(self.layers):
+            hidden_states = conv_layer(hidden_states)
+        return hidden_states
+
+
+class FlaxWav2Vec2FeatureEncoder(nn.Module):
+    """Construct the features from raw audio waveform"""
+
+    config: Wav2Vec2Config
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        self.conv_layers = FlaxConvLayersCollection(self.config, dtype=self.dtype)
+
+    def __call__(self, input_values, freeze_feature_encoder=False):
+        hidden_states = input_values[:, :, None]
+        hidden_states = self.conv_layers(hidden_states)
+        if freeze_feature_encoder:
+            hidden_states = jax.lax.stop_gradient(hidden_states)
+        return hidden_states
+
+
+class FlaxWav2Vec2FeatureProjection(nn.Module):
+    config: Wav2Vec2Config
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        self.layer_norm = nn.LayerNorm(epsilon=self.config.layer_norm_eps, dtype=self.dtype)
+        self.projection = nn.Dense(
+            self.config.hidden_size,
+            kernel_init=jax.nn.initializers.normal(self.config.initializer_range),
+            dtype=self.dtype,
+        )
+        self.dropout = nn.Dropout(rate=self.config.feat_proj_dropout)
+
+    def __call__(self, hidden_states, deterministic=True):
+        norm_hidden_states = self.layer_norm(hidden_states)
+        hidden_states = self.projection(norm_hidden_states)
+        hidden_states = self.dropout(hidden_states, deterministic=deterministic)
+        return hidden_states, norm_hidden_states
+
+
+class FlaxWav2Vec2Attention(nn.Module):
+    config: Wav2Vec2Config
+    embed_dim: int
+    num_heads: int
+    dropout: float = 0.0
+    bias: bool = True
+    dtype: jnp.dtype = jnp.float32  # the dtype of the computation
+
+    def setup(self) -> None:
+        self.head_dim = self.embed_dim // self.num_heads
+        if self.head_dim * self.num_heads != self.embed_dim:
+            raise ValueError(
+                f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})."
+            )
+
+        dense = partial(
+            nn.Dense,
+            self.embed_dim,
+            use_bias=self.bias,
+            dtype=self.dtype,
+            kernel_init=jax.nn.initializers.normal(self.config.initializer_range),
+        )
+
+        self.q_proj, self.k_proj, self.v_proj = dense(), dense(), dense()
+
+        self.fused_proj = nn.Dense(
+            self.embed_dim * 3,
+            use_bias=self.bias,
+            dtype=self.dtype,
+            kernel_init=jax.nn.initializers.normal(self.config.initializer_range),
+        )
+
+        self.out_proj = dense()
+
+        self.dropout_layer = nn.Dropout(rate=self.dropout)
+
+    def _split_heads(self, hidden_states):
+        return hidden_states.reshape(hidden_states.shape[:2] + (self.num_heads, self.head_dim))
+
+    def _merge_heads(self, hidden_states):
+        return hidden_states.reshape(hidden_states.shape[:2] + (self.embed_dim,))
+
+    def __call__(
+        self,
+        hidden_states: jnp.ndarray,
+        key_value_states: Optional[jnp.ndarray] = None,
+        attention_mask: Optional[jnp.ndarray] = None,
+        deterministic: bool = True,
+    ) -> Tuple[jnp.ndarray]:
+        """Input shape: Batch x Time x Channel"""
+
+        if self.config.fuse_matmuls:
+            attention_states = self.fused_proj(hidden_states)
+            query_states, key_states, value_states = jnp.split(attention_states, 3, axis=-1)
+
+        else:
+            # get query proj
+            query_states = self.q_proj(hidden_states)
+
+            key_states = self.k_proj(hidden_states)
+            value_states = self.v_proj(hidden_states)
+
+        query_states = self._split_heads(query_states)
+        key_states = self._split_heads(key_states)
+        value_states = self._split_heads(value_states)
+
+        if attention_mask is not None:
+            attention_mask = jnp.expand_dims(attention_mask, axis=(-3, -2))
+
+        # Convert the boolean attention mask to an attention bias.
+        if attention_mask is not None:
+            # attention mask in the form of attention bias
+            attention_bias = lax.select(
+                attention_mask > 0,
+                jnp.full(attention_mask.shape, 0.0).astype(self.dtype),
+                jnp.full(attention_mask.shape, float("-inf")).astype(self.dtype),
+            )
+        else:
+            attention_bias = None
+
+        dropout_rng = None
+        if not deterministic and self.dropout > 0.0:
+            dropout_rng = self.make_rng("dropout")
+
+        attn_weights = dot_product_attention_weights(
+            query_states,
+            key_states,
+            bias=attention_bias,
+            dropout_rng=dropout_rng,
+            dropout_rate=self.dropout,
+            broadcast_dropout=True,
+            deterministic=deterministic,
+            dtype=self.dtype,
+            precision=None,
+        )
+
+        attn_output = jnp.einsum("...hqk,...khd->...qhd", attn_weights, value_states)
+        attn_output = self._merge_heads(attn_output)
+        attn_output = self.out_proj(attn_output)
+
+        return attn_output, attn_weights
+
+
+class FlaxWav2Vec2FeedForward(nn.Module):
+    config: Wav2Vec2Config
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        self.intermediate_dropout = nn.Dropout(rate=self.config.activation_dropout)
+
+        self.intermediate_dense = nn.Dense(
+            self.config.intermediate_size,
+            kernel_init=jax.nn.initializers.normal(self.config.initializer_range),
+            dtype=self.dtype,
+        )
+        if isinstance(self.config.hidden_act, str):
+            self.intermediate_act_fn = ACT2FN[self.config.hidden_act]
+        else:
+            self.intermediate_act_fn = self.config.hidden_act
+
+        self.output_dense = nn.Dense(
+            self.config.hidden_size,
+            kernel_init=jax.nn.initializers.normal(self.config.initializer_range),
+            dtype=self.dtype,
+        )
+        self.output_dropout = nn.Dropout(rate=self.config.hidden_dropout)
+
+    def __call__(self, hidden_states, deterministic=True):
+        hidden_states = self.intermediate_dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        hidden_states = self.intermediate_dropout(hidden_states, deterministic=deterministic)
+
+        hidden_states = self.output_dense(hidden_states)
+        hidden_states = self.output_dropout(hidden_states, deterministic=deterministic)
+        return hidden_states
+
+
+class FlaxWav2Vec2EncoderLayerStableLayerNorm(nn.Module):
+    config: Wav2Vec2Config
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        self.attention = FlaxWav2Vec2Attention(
+            config=self.config,
+            embed_dim=self.config.hidden_size,
+            num_heads=self.config.num_attention_heads,
+            dropout=self.config.attention_dropout,
+            dtype=self.dtype,
+        )
+        self.dropout = nn.Dropout(rate=self.config.hidden_dropout)
+        self.layer_norm = nn.LayerNorm(epsilon=self.config.layer_norm_eps, dtype=self.dtype)
+        self.feed_forward = FlaxWav2Vec2FeedForward(self.config, dtype=self.dtype)
+        self.final_layer_norm = nn.LayerNorm(epsilon=self.config.layer_norm_eps, dtype=self.dtype)
+
+    def __call__(self, hidden_states, attention_mask=None, deterministic=True, output_attentions=False):
+        if self.config.use_scan:
+            hidden_states = hidden_states[0]
+        attn_residual = hidden_states
+        hidden_states = self.layer_norm(hidden_states)
+        hidden_states, attn_weights = self.attention(
+            hidden_states, attention_mask=attention_mask, deterministic=deterministic
+        )
+        hidden_states = self.dropout(hidden_states, deterministic=deterministic)
+        hidden_states = attn_residual + hidden_states
+        hidden_states = hidden_states + self.feed_forward(
+            self.final_layer_norm(hidden_states), deterministic=deterministic
+        )
+
+        outputs = (hidden_states,)
+
+        if output_attentions:
+            outputs += (attn_weights,)
+
+        if self.config.use_scan:
+            outputs = (outputs, None)
+
+        return outputs
+
+
+class FlaxWav2Vec2EncoderLayerStableLayerNormCollection(nn.Module):
+    config: Wav2Vec2Config
+    dtype: jnp.dtype = jnp.float32
+
+    @nn.compact
+    def __call__(
+        self,
+        hidden_states,
+        attention_mask=None,
+        deterministic: bool = True,
+        output_attentions: bool = False,
+        output_hidden_states: bool = False,
+        return_dict: bool = True,
+    ):
+        all_attentions = () if output_attentions else None
+        all_hidden_states = () if output_hidden_states else None
+
+        num_layers = self.config.num_hidden_layers
+        BlockEncoderLayer = (
+            remat(
+                FlaxWav2Vec2EncoderLayerStableLayerNorm,
+                static_argnums=(2, 3),
+                prevent_cse=not self.config.use_scan,
+            )
+            if self.config.gradient_checkpointing
+            else FlaxWav2Vec2EncoderLayerStableLayerNorm
+        )
+
+        if self.config.use_scan:
+            # since all decoder layers are the same, we use nn.scan directly
+            assert not output_attentions, "cannot use `scan` with `output_attentions` set to `True`"
+            assert not output_hidden_states, "cannot use `scan` with `output_hidden_states` set to `True`"
+            hidden_states = (hidden_states,)
+
+            hidden_states, _ = scan_with_axes(
+                BlockEncoderLayer,
+                variable_axes={"params": 0, "cache": 0},
+                split_rngs={"params": True, "dropout": True},
+                in_axes=(nn.broadcast, nn.broadcast, nn.broadcast),
+                length=num_layers,
+            )(self.config, dtype=self.dtype, name="FlaxWav2Vec2EncoderLayers",)(
+                hidden_states, attention_mask, deterministic, output_attentions
+            )
+            hidden_states = hidden_states[0]
+
+        else:
+            for layer in range(num_layers):
+                if output_hidden_states:
+                    all_hidden_states += (hidden_states,)
+
+                layer_outputs = BlockEncoderLayer(
+                    self.config,
+                    dtype=self.dtype,
+                    name=str(layer),
+                )(hidden_states, attention_mask, deterministic, output_attentions)
+
+                hidden_states = layer_outputs[0]
+
+                if output_attentions:
+                    all_attentions += (layer_outputs[1],)
+
+            if output_hidden_states:
+                all_hidden_states += (hidden_states,)
+
+        outputs = (hidden_states, all_hidden_states, all_attentions)
+
+        if not return_dict:
+            return tuple(v for v in outputs if v is not None)
+
+        return FlaxBaseModelOutput(
+            last_hidden_state=hidden_states, hidden_states=all_hidden_states, attentions=all_attentions
+        )
+
+
+class FlaxWav2Vec2StableLayerNormEncoder(nn.Module):
+    config: Wav2Vec2Config
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        self.pos_conv_embed = FlaxWav2Vec2PositionalConvEmbedding(self.config, dtype=self.dtype)
+        self.layer_norm = nn.LayerNorm(epsilon=self.config.layer_norm_eps, dtype=self.dtype)
+        self.dropout = nn.Dropout(rate=self.config.hidden_dropout)
+        self.layers = FlaxWav2Vec2EncoderLayerStableLayerNormCollection(self.config, dtype=self.dtype)
+
+    def __call__(
+        self,
+        hidden_states,
+        attention_mask=None,
+        deterministic=True,
+        output_attentions=False,
+        output_hidden_states=False,
+        return_dict=True,
+    ):
+
+        if attention_mask is not None:
+            # make sure padded tokens are not attended to
+            hidden_states = jnp.where(
+                jnp.broadcast_to(attention_mask[:, :, None], hidden_states.shape), hidden_states, 0
+            )
+
+        position_embeddings = self.pos_conv_embed(hidden_states)
+
+        hidden_states = hidden_states + position_embeddings
+        hidden_states = self.dropout(hidden_states, deterministic=deterministic)
+
+        outputs = self.layers(
+            hidden_states,
+            attention_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+
+        last_hidden_state = self.layer_norm(outputs[0])
+
+        # update the last element in `hidden_states` after applying `layernorm` above
+        hidden_states = None
+        if output_hidden_states:
+            hidden_states = outputs[1]
+            hidden_states = hidden_states[:-1] + (last_hidden_state,)
+
+        if not return_dict:
+            outputs = (last_hidden_state, hidden_states) + (outputs[2:] if output_hidden_states else outputs[1:])
+            return tuple(v for v in outputs if v is not None)
+
+        return FlaxBaseModelOutput(
+            last_hidden_state=last_hidden_state, hidden_states=hidden_states, attentions=outputs.attentions
+        )
+
+
+class FlaxWav2Vec2Adapter(nn.Module):
+    config: Wav2Vec2Config
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        # hidden_states require down-projection if feature dims don't match
+        if self.config.output_hidden_size != self.config.hidden_size:
+            self.proj = nn.Dense(
+                self.config.output_hidden_size,
+                kernel_init=jax.nn.initializers.normal(self.config.initializer_range),
+                dtype=self.dtype,
+            )
+            self.proj_layer_norm = nn.LayerNorm(epsilon=self.config.layer_norm_eps, dtype=self.dtype)
+        else:
+            self.proj = self.proj_layer_norm = None
+
+        self.layers = FlaxWav2Vec2AdapterLayersCollection(self.config, dtype=self.dtype)
+
+    def __call__(self, hidden_states, deterministic=True):
+        # down-project hidden_states if required
+        if self.proj is not None and self.proj_layer_norm is not None:
+            hidden_states = self.proj(hidden_states)
+            hidden_states = self.proj_layer_norm(hidden_states)
+
+        hidden_states = self.layers(hidden_states)
+
+        return hidden_states
+
+
+class FlaxWav2Vec2AdapterLayer(nn.Module):
+    config: Wav2Vec2Config
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        self.conv = nn.Conv(
+            features=2 * self.config.output_hidden_size,
+            kernel_size=(self.config.adapter_kernel_size,),
+            strides=(self.config.adapter_stride,),
+            padding=((1, 1),),
+            kernel_init=jax.nn.initializers.normal(self.config.initializer_range),
+            dtype=self.dtype,
+        )
+
+    def __call__(self, hidden_states):
+        hidden_states = self.conv(hidden_states)
+        hidden_states = nn.glu(hidden_states, axis=2)
+
+        return hidden_states
+
+
+class FlaxWav2Vec2AdapterLayersCollection(nn.Module):
+    config: Wav2Vec2Config
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        BlockAdapterLayer = remat(FlaxWav2Vec2AdapterLayer) if self.config.gradient_checkpointing else FlaxWav2Vec2AdapterLayer
+        self.layers = [
+            BlockAdapterLayer(self.config, name=str(i), dtype=self.dtype)
+            for i in range(self.config.num_adapter_layers)
+        ]
+
+    def __call__(self, hidden_states):
+        for conv_layer in self.layers:
+            hidden_states = conv_layer(hidden_states)
+
+        return hidden_states
+
+
+class FlaxWav2Vec2PreTrainedModel(FlaxPreTrainedModel):
+    """
+    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
+    models.
+    """
+
+    config_class = Wav2Vec2Config
+    base_model_prefix: str = "wav2vec2"
+    main_input_name = "input_values"
+    module_class: nn.Module = None
+
+    def __init__(
+        self,
+        config: Wav2Vec2Config,
+        input_shape: Tuple = (1, 1024),
+        seed: int = 0,
+        dtype: jnp.dtype = jnp.float32,
+        _do_init: bool = True,
+        **kwargs,
+    ):
+        module = self.module_class(config=config, dtype=dtype, **kwargs)
+        super().__init__(config, module, input_shape=input_shape, seed=seed, dtype=dtype, _do_init=_do_init)
+
+    def init_weights(self, rng: jax.random.PRNGKey, input_shape: Tuple) -> FrozenDict:
+        # init input tensors
+        input_values = jnp.zeros(input_shape, dtype="i4")
+        attention_mask = jnp.ones_like(input_values)
+        params_rng, dropout_rng = jax.random.split(rng, 2)
+        rngs = {"params": params_rng, "dropout": dropout_rng}
+
+        return self.module.init(rngs, input_values, attention_mask, return_dict=False)["params"]
+
+    def __call__(
+        self,
+        input_values,
+        attention_mask=None,
+        mask_time_indices=None,
+        extract_features=None,
+        params: dict = None,
+        dropout_rng: jax.random.PRNGKey = None,
+        train: bool = False,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        output_features: Optional[bool] = None,
+        freeze_feature_encoder: bool = False,
+        return_dict: Optional[bool] = None,
+    ):
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.return_dict
+
+        if attention_mask is None:
+            batch_size, sequence_length = input_values.shape
+            attention_mask = jnp.ones((batch_size, sequence_length))
+
+        if extract_features is not None:
+            extract_features = jnp.array(extract_features, dtype="f4")
+
+        # Handle any PRNG if needed
+        rngs = {}
+        if dropout_rng is not None:
+            rngs["dropout"] = dropout_rng
+
+        inputs = {"params": params or self.params}
+
+        return self.module.apply(
+            inputs,
+            jnp.array(input_values, dtype="f4"),
+            jnp.array(attention_mask, dtype="i4"),
+            mask_time_indices,
+            extract_features,
+            not train,
+            output_attentions,
+            output_hidden_states,
+            output_features,
+            freeze_feature_encoder,
+            return_dict,
+            rngs=rngs,
+        )
+
+    def _get_feat_extract_output_lengths(
+        self, input_lengths: Union[jnp.ndarray, int], add_adapter: Optional[bool] = None
+    ):
+        return self.module._get_feat_extract_output_lengths(input_lengths, add_adapter=add_adapter)
+
+    def _get_feature_vector_attention_mask(
+        self, feature_vector_length: int, attention_mask: jnp.ndarray, add_adapter=None
+    ):
+        return self.module._get_feature_vector_attention_mask(feature_vector_length, attention_mask, add_adapter=add_adapter)
+
+
+class FlaxWav2Vec2Module(nn.Module):
+    config: Wav2Vec2Config
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        self.feature_extractor = FlaxWav2Vec2FeatureEncoder(self.config, dtype=self.dtype)
+        self.feature_projection = FlaxWav2Vec2FeatureProjection(self.config, dtype=self.dtype)
+        self.masked_spec_embed = self.param(
+            "masked_spec_embed", jax.nn.initializers.uniform(), (self.config.hidden_size,)
+        )
+
+        if self.config.do_stable_layer_norm:
+            self.encoder = FlaxWav2Vec2StableLayerNormEncoder(self.config, dtype=self.dtype)
+        else:
+            raise NotImplementedError("``config.do_stable_layer_norm is False`` is currently not supported.")
+
+        self.adapter = FlaxWav2Vec2Adapter(self.config, dtype=self.dtype) if self.config.add_adapter else None
+
+    def __call__(
+        self,
+        input_values,
+        attention_mask=None,
+        mask_time_indices=None,
+        extract_features=None,
+        deterministic=True,
+        output_attentions=None,
+        output_hidden_states=None,
+        output_features=False,
+        freeze_feature_encoder=False,
+        return_dict=None,
+    ):
+
+        # forward pass through the feature extractor if features not specified
+        if extract_features is None:
+            extract_features = self.feature_extractor(input_values, freeze_feature_encoder=freeze_feature_encoder)
+
+        if output_features:
+            return extract_features
+
+        # make sure that no loss is computed on padded inputs
+        if attention_mask is not None:
+            # compute reduced attention_mask corresponding to feature vectors
+            attention_mask = self._get_feature_vector_attention_mask(
+                extract_features.shape[1], attention_mask, add_adapter=False
+            )
+
+        hidden_states, extract_features = self.feature_projection(extract_features, deterministic=deterministic)
+        if mask_time_indices is not None:  # apply SpecAugment along time axis with given indices
+            hidden_states = jnp.where(
+                jnp.broadcast_to(mask_time_indices[:, :, None], hidden_states.shape),
+                jnp.broadcast_to(self.masked_spec_embed[None, None, :], hidden_states.shape),
+                hidden_states,
+            )
+
+        encoder_outputs = self.encoder(
+            hidden_states,
+            attention_mask=attention_mask,
+            deterministic=deterministic,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+
+        hidden_states = encoder_outputs[0]
+
+        if self.adapter is not None:
+            hidden_states = self.adapter(hidden_states)
+
+        if not return_dict:
+            return (hidden_states, extract_features) + encoder_outputs[1:]
+
+        return FlaxWav2Vec2BaseModelOutput(
+            last_hidden_state=hidden_states,
+            extract_features=extract_features,
+            hidden_states=encoder_outputs.hidden_states,
+            attentions=encoder_outputs.attentions,
+        )
+
+    def _get_feat_extract_output_lengths(
+        self, input_lengths: Union[jnp.ndarray, int], add_adapter: Optional[bool] = None
+    ):
+        """
+        Computes the output length of the convolutional layers
+        """
+
+        add_adapter = self.config.add_adapter if add_adapter is None else add_adapter
+
+        def _conv_out_length(input_length, kernel_size, stride):
+            # 1D convolutional layer output length formula taken
+            # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
+            return (input_length - kernel_size) // stride + 1
+
+        for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
+            input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
+
+        if add_adapter:
+            for _ in range(self.config.num_adapter_layers):
+                input_lengths = _conv_out_length(input_lengths, 1, self.config.adapter_stride)
+
+        return input_lengths
+
+    def _get_feature_vector_attention_mask(
+        self, feature_vector_length: int, attention_mask: jnp.ndarray, add_adapter=None
+    ):
+        # Effectively attention_mask.sum(-1), but not inplace to be able to run
+        # on inference mode.
+        non_padded_lengths = attention_mask.cumsum(axis=-1)[:, -1]
+
+        output_lengths = self._get_feat_extract_output_lengths(non_padded_lengths, add_adapter=add_adapter)
+
+        batch_size = attention_mask.shape[0]
+
+        attention_mask = jnp.zeros((batch_size, feature_vector_length), dtype=attention_mask.dtype)
+        # these two operations makes sure that all values
+        # before the output lengths indices are attended to
+        attention_mask = attention_mask.at[jnp.arange(attention_mask.shape[0]), output_lengths - 1].set(1)
+        attention_mask = jnp.flip(jnp.flip(attention_mask, -1).cumsum(-1), -1).astype("bool")
+        return attention_mask
+
+
+class FlaxWav2Vec2Model(FlaxWav2Vec2PreTrainedModel):
+    module_class = FlaxWav2Vec2Module
+
+
+class FlaxWav2Vec2ForCTCModule(nn.Module):
+    config: Wav2Vec2Config
+    dtype: jnp.dtype = jnp.float32
+
+    def setup(self):
+        self.wav2vec2 = FlaxWav2Vec2Module(self.config, dtype=self.dtype)
+        self.dropout = nn.Dropout(rate=self.config.final_dropout)
+        self.lm_head = nn.Dense(
+            self.config.vocab_size,
+            kernel_init=jax.nn.initializers.normal(self.config.initializer_range),
+            dtype=self.dtype,
+        )
+
+    def __call__(
+        self,
+        input_values,
+        attention_mask=None,
+        mask_time_indices=None,
+        extract_features=None,
+        deterministic=True,
+        output_attentions=None,
+        output_hidden_states=None,
+        output_features=False,
+        freeze_feature_encoder=False,
+        return_dict=None,
+    ):
+        outputs = self.wav2vec2(
+            input_values,
+            attention_mask=attention_mask,
+            mask_time_indices=mask_time_indices,
+            deterministic=deterministic,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            freeze_feature_encoder=freeze_feature_encoder,
+            return_dict=return_dict,
+        )
+
+        hidden_states = outputs[0]
+        hidden_states = self.dropout(hidden_states, deterministic=deterministic)
+
+        logits = self.lm_head(hidden_states)
+
+        if not return_dict:
+            return (logits,) + outputs[2:]
+
+        return FlaxCausalLMOutput(logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions)
+
+    def _get_feat_extract_output_lengths(
+        self, input_lengths: Union[jnp.ndarray, int], add_adapter: Optional[bool] = None
+    ):
+        """
+        Computes the output length of the convolutional layers
+        """
+
+        add_adapter = self.config.add_adapter if add_adapter is None else add_adapter
+
+        def _conv_out_length(input_length, kernel_size, stride):
+            # 1D convolutional layer output length formula taken
+            # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
+            return (input_length - kernel_size) // stride + 1
+
+        for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
+            input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
+
+        if add_adapter:
+            for _ in range(self.config.num_adapter_layers):
+                input_lengths = _conv_out_length(input_lengths, 1, self.config.adapter_stride)
+
+        return input_lengths
+
+    def _get_feature_vector_attention_mask(
+        self, feature_vector_length: int, attention_mask: jnp.ndarray, add_adapter=None
+    ):
+        # Effectively attention_mask.sum(-1), but not inplace to be able to run
+        # on inference mode.
+        non_padded_lengths = attention_mask.cumsum(axis=-1)[:, -1]
+
+        output_lengths = self._get_feat_extract_output_lengths(non_padded_lengths, add_adapter=add_adapter)
+
+        batch_size = attention_mask.shape[0]
+
+        attention_mask = jnp.zeros((batch_size, feature_vector_length), dtype=attention_mask.dtype)
+        # these two operations makes sure that all values
+        # before the output lengths indices are attended to
+        attention_mask = attention_mask.at[jnp.arange(attention_mask.shape[0]), output_lengths - 1].set(1)
+        attention_mask = jnp.flip(jnp.flip(attention_mask, -1).cumsum(-1), -1).astype("bool")
+        return attention_mask
+
+
+class FlaxWav2Vec2ForCTC(FlaxWav2Vec2PreTrainedModel):
+    module_class = FlaxWav2Vec2ForCTCModule
diff --git a/preprocessor_config.json b/preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655
--- /dev/null
+++ b/preprocessor_config.json
@@ -0,0 +1,9 @@
+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..a4daeba0379dc91cc89224bd2b962066af12bdd3
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,48 @@
+WANDB_ENTITY=NbAiLab WANDB_PROJECT=wav2vec2 python run_flax_speech_recognition_ctc.py \
+        --model_name_or_path="facebook/wav2vec2-xls-r-1b" \
+        --hub_model_id="NbAiLab/wav2vec2-1b-npsc-nst" \
+        --tokenizer_name="./" \
+        --output_dir="./" \
+        --overwrite_output_dir \
+        --num_train_epochs="40" \
+        --per_device_train_batch_size="12" \
+        --per_device_eval_batch_size="12" \
+        --gradient_accumulation_steps="1" \
+        --precision="full_mixed" \
+        --matmul_precision="bfloat16" \
+        --learning_rate="1e-4" \
+        --warmup_steps="4000" \
+        --length_column_name="input_length" \
+        --evaluation_strategy="steps" \
+        --text_column_name="text" \
+        --save_steps="1000" \
+        --eval_steps="1000" \
+        --logging_steps="100" \
+        --layerdrop="0.041" \
+        --attention_dropout="0.094" \
+        --activation_dropout="0.055" \
+        --hidden_dropout="0.047" \
+        --save_total_limit="5" \
+        --freeze_feature_encoder \
+        --feat_proj_dropout="0.04" \
+        --mask_time_prob="0.082" \
+        --mask_time_length="10" \
+        --mask_feature_prob="0.25" \
+        --mask_feature_length="64" \
+        --gradient_checkpointing \
+        --min_duration_in_seconds="0.5" \
+        --max_duration_in_seconds="20.0" \
+        --use_auth_token \
+        --seed="42" \
+        --group_by_length \
+        --do_train --do_eval \
+        --push_to_hub \
+        --preprocessing_num_workers="32" \
+        --ctc_zero_infinity \
+        --do_lower_case \
+        --wandb_project="wav2vec2" \
+        --wandb_name="wav2vec2-1b-npsc-nst" \
+        --remove_punctuation
+
+
+# --fp16
diff --git a/run_flax_speech_recognition_ctc.py b/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..11df8fb90ea54a20f8f34bbb40442193e151ddc2
--- /dev/null
+++ b/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1604 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc[data_args.train_split_name]) / (len(npsc[data_args.train_split_name]) + len(npsc[data_args.eval_split_name]))  # Use same train/val ratio as NPSC
+    nst_train = nst[data_args.train_split_name].train_test_split(train_size=split, seed=seed)
+    nst[data_args.train_split_name] = nst_train["train"]
+    nst[data_args.eval_split_name] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc[data_args.train_split_name].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst[data_args.train_split_name].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in data_args.train_split_name, data_args.eval_split_name, data_args.test_split_name:
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets[data_args.train_split_name] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets[data_args.eval_split_name] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets[data_args.train_split_name] = raw_datasets[data_args.train_split_name].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets[data_args.eval_split_name] = raw_datasets[data_args.eval_split_name].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        raw_datasets[data_args.test_split_name] = raw_datasets[data_args.test_split_name].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets[data_args.train_split_name] = raw_datasets[data_args.train_split_name].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets[data_args.train_split_name])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[data_args.eval_split_name], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets[data_args.eval_split_name][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets[data_args.train_split_name], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets[data_args.train_split_name][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix=data_args.train_split_name)
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in [data_args.test_split_name]:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..7354bfa90aed939d148e5ca7b8f5a2b167a730e3
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,190 @@
+{
+  "additional_special_tokens": [
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "[PAD]",
+  "unk_token": "[UNK]"
+}
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4cc035b8c547a4620a2aba67fea665613c851fc5
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,12 @@
+{
+  "bos_token": "<s>",
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "name_or_path": "./",
+  "pad_token": "[PAD]",
+  "replace_word_delimiter_char": " ",
+  "special_tokens_map_file": null,
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "unk_token": "[UNK]",
+  "word_delimiter_token": "|"
+}
diff --git a/vocab.json b/vocab.json
new file mode 100644
index 0000000000000000000000000000000000000000..f862690b7b4f5e3c3553e1808f5ee83577f4ed80
--- /dev/null
+++ b/vocab.json
@@ -0,0 +1,41 @@
+{
+  "(": 1,
+  ")": 2,
+  "0": 3,
+  "3": 4,
+  "7": 5,
+  "8": 6,
+  "9": 7,
+  "[PAD]": 38,
+  "[UNK]": 37,
+  "a": 8,
+  "b": 9,
+  "c": 10,
+  "d": 11,
+  "e": 12,
+  "f": 13,
+  "g": 14,
+  "h": 15,
+  "i": 16,
+  "j": 17,
+  "k": 18,
+  "l": 19,
+  "m": 20,
+  "n": 21,
+  "o": 22,
+  "p": 23,
+  "q": 24,
+  "r": 25,
+  "s": 26,
+  "t": 27,
+  "u": 28,
+  "v": 29,
+  "w": 30,
+  "x": 31,
+  "y": 32,
+  "z": 33,
+  "|": 0,
+  "å": 34,
+  "æ": 35,
+  "ø": 36
+}
diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
new file mode 120000
index 0000000000000000000000000000000000000000..bd9a1bb653daf580cb0478bab6bed52227518c91
--- /dev/null
+++ b/wandb/debug-internal.log
@@ -0,0 +1 @@
+run-20220730_174606-j2u4n7h4/logs/debug-internal.log
\ No newline at end of file
diff --git a/wandb/debug.log b/wandb/debug.log
new file mode 120000
index 0000000000000000000000000000000000000000..f9b9788e845bd50b9d1fd6e4856cebc1c21fd3cf
--- /dev/null
+++ b/wandb/debug.log
@@ -0,0 +1 @@
+run-20220730_174606-j2u4n7h4/logs/debug.log
\ No newline at end of file
diff --git a/wandb/latest-run b/wandb/latest-run
new file mode 120000
index 0000000000000000000000000000000000000000..bf90ee7bb24b3a8fe5d6ffdb572406b591f10e6f
--- /dev/null
+++ b/wandb/latest-run
@@ -0,0 +1 @@
+run-20220730_174606-j2u4n7h4
\ No newline at end of file
diff --git a/wandb/run-20220729_183213-356uc50u/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_183213-356uc50u/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe
--- /dev/null
+++ b/wandb/run-20220729_183213-356uc50u/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1596 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220729_183213-356uc50u/files/config.yaml b/wandb/run-20220729_183213-356uc50u/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f2e94752f5388b27318ca481df18004064ee224e
--- /dev/null
+++ b/wandb/run-20220729_183213-356uc50u/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659119533
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220729_183213-356uc50u/files/output.log b/wandb/run-20220729_183213-356uc50u/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..13fed222a38a47168df9365eeae2cb464dba0718
--- /dev/null
+++ b/wandb/run-20220729_183213-356uc50u/files/output.log
@@ -0,0 +1,253 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=500,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul29_18-32-09_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=32,
+per_device_train_batch_size=32,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=500,
+save_strategy=steps,
+save_total_limit=3,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+Downloading and preparing dataset nst/no-close to /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53...
+Downloading builder script: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13.1k/13.1k [00:00<00:00, 154kB/s]
+Downloading data files:   0%|                                                                                                                                                                                                                                       | 0/9 [00:00<?, ?it/s]
+
+Downloading data:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉     | 32.6M/33.4M [00:01<00:00, 33.4MB/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Downloading data files:  11%|████████████████████████▊                                                                                                                                                                                                      | 1/9 [00:34<04:39, 34.99s/it]
+
+Downloading data:  88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                           | 29.2M/33.4M [00:01<00:00, 40.0MB/s]
+
+
+
+
+
+
+Downloading data files:  22%|█████████████████████████████████████████████████▌                                                                                                                                                                             | 2/9 [00:52<02:53, 24.73s/it]
+
+Downloading data:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                   | 30.6M/33.6M [00:01<00:00, 37.1MB/s]
+
+
+
+
+
+
+Downloading data files:  33%|██████████████████████████████████████████████████████████████████████████▎                                                                                                                                                    | 3/9 [01:09<02:08, 21.40s/it]
+
+Downloading data:  99%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 33.2M/33.5M [00:01<00:00, 41.7MB/s]
+
+
+
+
+
+
+Downloading data files:  44%|███████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                                            | 4/9 [01:29<01:42, 20.55s/it]
+
+Downloading data:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████              | 31.3M/33.3M [00:01<00:00, 39.8MB/s]
+
+
+
+
+
+
+Downloading data files:  56%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                   | 5/9 [01:46<01:17, 19.42s/it]
+
+Downloading data:  98%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 32.8M/33.4M [00:01<00:00, 35.0MB/s]
+
+
+
+
+
+
+
+
+
+Downloading data files:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                          | 6/9 [02:09<01:02, 20.72s/it]
+Downloading data:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                          | 29.5M/33.4M [00:01<00:00, 36.6MB/s]
+
+
+
+
+
+
+
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.6M/33.6M [00:01<00:00, 24.7MB/s]
+Downloading data:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                     | 30.3M/33.6M [00:01<00:00, 38.8MB/s]
+
+
+
+
+
+
+Downloading data files:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                        | 8/9 [02:45<00:19, 19.04s/it]
+
+Downloading data:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                   | 21.0M/25.0M [00:01<00:00, 32.3MB/s]
+
+
+
+
+Downloading data files: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [02:59<00:00, 19.91s/it]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29.4M/29.4M [00:01<00:00, 23.8MB/s]
+Downloading data:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋              | 27.5M/29.4M [00:01<00:00, 38.6MB/s]
+
+
+
+
+
+
+Downloading data files:  33%|██████████████████████████████████████████████████████████████████████████▎                                                                                                                                                    | 1/3 [00:17<00:34, 17.06s/it]
+
+Downloading data:  98%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋   | 28.9M/29.3M [00:01<00:00, 35.3MB/s]
+
+
+
+
+
+
+Downloading data files:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                          | 2/3 [00:34<00:17, 17.12s/it]
+
+Downloading data:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                | 12.1M/13.0M [00:00<00:00, 25.5MB/s]
+
+
+
+Downloading data files: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:45<00:00, 15.23s/it]
+Traceback (most recent call last):
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 84, in encode_example
+    import soundfile as sf  # soundfile is a dependency of librosa, needed to decode audio files.
+ModuleNotFoundError: No module named 'soundfile'
+The above exception was the direct cause of the following exception:
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1596, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 870, in main
+    raw_datasets = make_dataset(seed=training_args.seed)
+  File "run_flax_speech_recognition_ctc.py", line 802, in make_dataset
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+  File "/data/flax/lib/python3.8/site-packages/datasets/load.py", line 1746, in load_dataset
+    builder_instance.download_and_prepare(
+  File "/data/flax/lib/python3.8/site-packages/datasets/builder.py", line 704, in download_and_prepare
+    self._download_and_prepare(
+  File "/data/flax/lib/python3.8/site-packages/datasets/builder.py", line 1227, in _download_and_prepare
+    super()._download_and_prepare(dl_manager, verify_infos, check_duplicate_keys=verify_infos)
+  File "/data/flax/lib/python3.8/site-packages/datasets/builder.py", line 793, in _download_and_prepare
+    self._prepare_split(split_generator, **prepare_split_kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/builder.py", line 1218, in _prepare_split
+    example = self.info.features.encode_example(record)
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1614, in encode_example
+    return encode_nested_example(self, example)
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1165, in encode_nested_example
+    {
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1166, in <dictcomp>
+    k: encode_nested_example(sub_schema, sub_obj, level=level + 1)
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1220, in encode_nested_example
+    return schema.encode_example(obj) if obj is not None else None
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 86, in encode_example
+    raise ImportError("To support encoding audio data, please install 'soundfile'.") from err
+ImportError: To support encoding audio data, please install 'soundfile'.
\ No newline at end of file
diff --git a/wandb/run-20220729_183213-356uc50u/files/requirements.txt b/wandb/run-20220729_183213-356uc50u/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa5033f65271e17cdb6ed0e1630a231dcd521c75
--- /dev/null
+++ b/wandb/run-20220729_183213-356uc50u/files/requirements.txt
@@ -0,0 +1,137 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+libtpu-nightly==0.1.dev20220722
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pygments==2.11.1
+pyparsing==3.0.6
+python-dateutil==2.8.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.11.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220729_183213-356uc50u/files/wandb-metadata.json b/wandb/run-20220729_183213-356uc50u/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..5ebb43cc9f3ec4f7944916acb29df29b05628c98
--- /dev/null
+++ b/wandb/run-20220729_183213-356uc50u/files/wandb-metadata.json
@@ -0,0 +1,67 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-29T18:32:17.029179",
+    "startedAt": "2022-07-29T18:32:13.606321",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=32",
+        "--per_device_eval_batch_size=32",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=500",
+        "--eval_steps=500",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=3",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project",
+        "wav2vec2",
+        "--wandb_name",
+        "wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220729_183213-356uc50u/files/wandb-summary.json b/wandb/run-20220729_183213-356uc50u/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..05d703cd005ebc6bef1c14b739a96224e37fa421
--- /dev/null
+++ b/wandb/run-20220729_183213-356uc50u/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 256}}
\ No newline at end of file
diff --git a/wandb/run-20220729_183213-356uc50u/logs/debug-internal.log b/wandb/run-20220729_183213-356uc50u/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..07b19490a0e387933a9d2c98db18dbc39afc9d79
--- /dev/null
+++ b/wandb/run-20220729_183213-356uc50u/logs/debug-internal.log
@@ -0,0 +1,301 @@
+2022-07-29 18:32:14,486 INFO    MainThread:136862 [internal.py:wandb_internal():87] W&B internal server running at pid: 136862, started at: 2022-07-29 18:32:14.486632
+2022-07-29 18:32:14,488 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: check_version
+2022-07-29 18:32:14,489 INFO    WriterThread:136862 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/run-356uc50u.wandb
+2022-07-29 18:32:14,489 DEBUG   SenderThread:136862 [sender.py:send():234] send: header
+2022-07-29 18:32:14,490 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: check_version
+2022-07-29 18:32:14,527 DEBUG   SenderThread:136862 [sender.py:send():234] send: run
+2022-07-29 18:32:14,729 INFO    SenderThread:136862 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files
+2022-07-29 18:32:14,729 INFO    SenderThread:136862 [sender.py:_start_run_threads():804] run started: 356uc50u with start time 1659119533
+2022-07-29 18:32:14,729 DEBUG   SenderThread:136862 [sender.py:send():234] send: summary
+2022-07-29 18:32:14,729 INFO    SenderThread:136862 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 18:32:14,730 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: run_start
+2022-07-29 18:32:15,737 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/wandb-summary.json
+2022-07-29 18:32:17,028 DEBUG   HandlerThread:136862 [meta.py:__init__():40] meta init
+2022-07-29 18:32:17,029 DEBUG   HandlerThread:136862 [meta.py:__init__():54] meta init done
+2022-07-29 18:32:17,029 DEBUG   HandlerThread:136862 [meta.py:probe():214] probe
+2022-07-29 18:32:17,030 DEBUG   HandlerThread:136862 [meta.py:_setup_git():204] setup git
+2022-07-29 18:32:17,062 DEBUG   HandlerThread:136862 [meta.py:_setup_git():211] setup git done
+2022-07-29 18:32:17,062 DEBUG   HandlerThread:136862 [meta.py:_save_code():92] save code
+2022-07-29 18:32:17,074 DEBUG   HandlerThread:136862 [meta.py:_save_code():113] save code done
+2022-07-29 18:32:17,074 DEBUG   HandlerThread:136862 [meta.py:_save_patches():130] save patches
+2022-07-29 18:32:17,166 DEBUG   HandlerThread:136862 [meta.py:_save_patches():172] save patches done
+2022-07-29 18:32:17,166 DEBUG   HandlerThread:136862 [meta.py:_save_pip():58] save pip
+2022-07-29 18:32:17,166 DEBUG   HandlerThread:136862 [meta.py:_save_pip():72] save pip done
+2022-07-29 18:32:17,166 DEBUG   HandlerThread:136862 [meta.py:probe():252] probe done
+2022-07-29 18:32:17,193 DEBUG   SenderThread:136862 [sender.py:send():234] send: files
+2022-07-29 18:32:17,193 INFO    SenderThread:136862 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-29 18:32:17,193 INFO    SenderThread:136862 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-29 18:32:17,199 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:32:17,199 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:32:17,736 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/code/run_flax_speech_recognition_ctc.py
+2022-07-29 18:32:17,737 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/requirements.txt
+2022-07-29 18:32:17,737 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:17,737 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/wandb-metadata.json
+2022-07-29 18:32:17,737 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/code
+2022-07-29 18:32:17,880 INFO    Thread-12 :136862 [upload_job.py:push():137] Uploaded file /tmp/tmp1arbfimxwandb/oqv2t90y-code/run_flax_speech_recognition_ctc.py
+2022-07-29 18:32:18,151 INFO    Thread-11 :136862 [upload_job.py:push():137] Uploaded file /tmp/tmp1arbfimxwandb/1hi0yjav-wandb-metadata.json
+2022-07-29 18:32:19,737 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:21,738 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:23,739 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:25,740 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:27,741 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:29,742 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:31,743 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:32,337 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:32:32,338 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:32:33,744 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:35,745 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:37,746 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:39,747 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:41,748 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:43,749 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:45,108 DEBUG   SenderThread:136862 [sender.py:send():234] send: stats
+2022-07-29 18:32:45,750 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:47,497 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:32:47,497 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:32:47,751 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:49,752 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:51,753 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:53,753 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:55,754 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:57,755 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:32:59,756 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:01,757 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:02,633 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:33:02,633 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:33:03,758 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:05,759 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:07,760 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:09,761 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:11,761 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:13,762 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:15,183 DEBUG   SenderThread:136862 [sender.py:send():234] send: stats
+2022-07-29 18:33:15,763 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:17,764 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:17,772 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:33:17,772 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:33:19,765 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:21,766 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:23,767 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:25,768 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:27,769 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:29,770 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:31,771 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:32,909 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:33:32,909 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:33:33,772 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:35,773 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:37,774 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:39,775 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:41,776 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:43,777 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:45,245 DEBUG   SenderThread:136862 [sender.py:send():234] send: stats
+2022-07-29 18:33:45,778 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:47,779 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:48,051 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:33:48,051 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:33:49,780 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:51,781 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:53,782 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:55,783 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:57,784 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:33:59,785 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:01,786 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:03,192 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:34:03,192 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:34:03,786 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:05,787 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:07,788 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:09,789 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:11,790 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:13,791 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:15,308 DEBUG   SenderThread:136862 [sender.py:send():234] send: stats
+2022-07-29 18:34:15,792 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:17,793 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:18,334 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:34:18,334 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:34:19,794 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:21,795 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:23,796 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:25,797 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:27,798 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:29,799 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:31,800 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:33,472 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:34:33,472 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:34:33,801 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:35,802 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:37,803 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:39,804 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:41,805 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:43,806 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:45,381 DEBUG   SenderThread:136862 [sender.py:send():234] send: stats
+2022-07-29 18:34:45,807 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:47,808 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:48,609 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:34:48,610 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:34:49,809 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:51,810 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:53,811 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:55,812 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:57,813 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:34:59,814 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:01,815 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:03,748 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:35:03,748 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:35:03,815 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:05,816 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:07,817 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:09,818 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:11,819 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:13,820 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:15,454 DEBUG   SenderThread:136862 [sender.py:send():234] send: stats
+2022-07-29 18:35:15,821 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:17,822 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:18,886 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:35:18,886 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:35:19,823 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:33,829 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:34,020 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:35:34,021 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:35:35,830 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:37,831 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:39,831 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:41,832 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:43,833 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:45,525 DEBUG   SenderThread:136862 [sender.py:send():234] send: stats
+2022-07-29 18:35:45,834 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:47,836 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:49,158 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:35:49,158 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:35:49,837 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:51,838 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:53,839 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:55,840 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:57,841 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:35:59,842 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:36:01,843 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:36:03,844 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:36:04,296 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:36:04,296 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:36:05,845 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:36:07,846 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:36:09,846 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:36:11,847 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:36:13,848 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:36:15,598 DEBUG   SenderThread:136862 [sender.py:send():234] send: stats
+2022-07-29 18:36:15,849 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:36:17,850 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:36:19,431 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:36:19,431 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:36:19,851 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:36:23,853 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:36:29,855 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:36:30,795 DEBUG   SenderThread:136862 [sender.py:send():234] send: telemetry
+2022-07-29 18:36:30,795 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:36:30,795 DEBUG   SenderThread:136862 [sender.py:send():234] send: exit
+2022-07-29 18:36:30,795 INFO    SenderThread:136862 [sender.py:send_exit():366] handling exit code: 1
+2022-07-29 18:36:30,796 INFO    SenderThread:136862 [sender.py:send_exit():368] handling runtime: 256
+2022-07-29 18:36:30,798 INFO    SenderThread:136862 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 18:36:30,798 INFO    SenderThread:136862 [sender.py:send_exit():374] send defer
+2022-07-29 18:36:30,798 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:36:30,799 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:36:30,799 INFO    HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-29 18:36:30,799 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:36:30,799 INFO    SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-29 18:36:30,799 INFO    SenderThread:136862 [sender.py:transition_state():387] send defer: 1
+2022-07-29 18:36:30,800 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:36:30,800 INFO    HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-29 18:36:30,830 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:36:30,830 INFO    SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-29 18:36:30,830 INFO    SenderThread:136862 [sender.py:transition_state():387] send defer: 2
+2022-07-29 18:36:30,831 DEBUG   SenderThread:136862 [sender.py:send():234] send: stats
+2022-07-29 18:36:30,831 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:36:30,831 INFO    HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-29 18:36:30,831 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:36:30,831 INFO    SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-29 18:36:30,831 INFO    SenderThread:136862 [sender.py:transition_state():387] send defer: 3
+2022-07-29 18:36:30,832 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:36:30,832 INFO    HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-29 18:36:30,832 DEBUG   SenderThread:136862 [sender.py:send():234] send: summary
+2022-07-29 18:36:30,832 INFO    SenderThread:136862 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 18:36:30,832 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:36:30,832 INFO    SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-29 18:36:30,832 INFO    SenderThread:136862 [sender.py:transition_state():387] send defer: 4
+2022-07-29 18:36:30,832 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:36:30,833 INFO    HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-29 18:36:30,833 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:36:30,833 INFO    SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-29 18:36:30,856 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/wandb-summary.json
+2022-07-29 18:36:30,856 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:36:30,900 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:36:31,177 INFO    SenderThread:136862 [sender.py:transition_state():387] send defer: 5
+2022-07-29 18:36:31,177 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:36:31,178 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:36:31,178 INFO    HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-29 18:36:31,178 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:36:31,178 INFO    SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-29 18:36:31,178 INFO    SenderThread:136862 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-29 18:36:31,279 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:36:31,856 INFO    Thread-8  :136862 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/config.yaml
+2022-07-29 18:36:31,857 INFO    SenderThread:136862 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files
+2022-07-29 18:36:31,857 INFO    SenderThread:136862 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/config.yaml config.yaml
+2022-07-29 18:36:31,857 INFO    SenderThread:136862 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/requirements.txt requirements.txt
+2022-07-29 18:36:31,857 INFO    SenderThread:136862 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log output.log
+2022-07-29 18:36:31,858 INFO    SenderThread:136862 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/wandb-summary.json wandb-summary.json
+2022-07-29 18:36:31,858 INFO    SenderThread:136862 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/wandb-metadata.json wandb-metadata.json
+2022-07-29 18:36:31,863 INFO    SenderThread:136862 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-29 18:36:31,863 INFO    SenderThread:136862 [sender.py:transition_state():387] send defer: 6
+2022-07-29 18:36:31,864 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:36:31,870 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:36:31,870 INFO    HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-29 18:36:31,870 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:36:31,870 INFO    SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-29 18:36:31,870 INFO    SenderThread:136862 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 18:36:31,965 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:36:31,965 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:36:32,067 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:36:32,067 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:36:32,169 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:36:32,169 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:36:32,270 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:36:32,271 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:36:32,332 INFO    Thread-13 :136862 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/config.yaml
+2022-07-29 18:36:32,338 INFO    Thread-16 :136862 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/wandb-summary.json
+2022-07-29 18:36:32,340 INFO    Thread-14 :136862 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/requirements.txt
+2022-07-29 18:36:32,348 INFO    Thread-15 :136862 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/files/output.log
+2022-07-29 18:36:32,372 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:36:32,372 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:36:32,473 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:36:32,474 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:36:32,548 INFO    Thread-7  :136862 [sender.py:transition_state():387] send defer: 7
+2022-07-29 18:36:32,549 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:36:32,549 INFO    HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-29 18:36:32,549 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:36:32,549 INFO    SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-29 18:36:32,575 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:36:33,221 INFO    SenderThread:136862 [sender.py:transition_state():387] send defer: 8
+2022-07-29 18:36:33,221 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:36:33,222 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:36:33,222 INFO    HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-29 18:36:33,222 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:36:33,222 INFO    SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-29 18:36:33,222 INFO    SenderThread:136862 [sender.py:transition_state():387] send defer: 9
+2022-07-29 18:36:33,223 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:36:33,223 INFO    HandlerThread:136862 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-29 18:36:33,223 DEBUG   SenderThread:136862 [sender.py:send():234] send: final
+2022-07-29 18:36:33,223 DEBUG   SenderThread:136862 [sender.py:send():234] send: footer
+2022-07-29 18:36:33,224 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:36:33,224 INFO    SenderThread:136862 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-29 18:36:33,323 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:36:33,323 DEBUG   SenderThread:136862 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:36:33,323 INFO    SenderThread:136862 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 18:36:33,628 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-29 18:36:33,629 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-29 18:36:33,630 DEBUG   HandlerThread:136862 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-29 18:36:33,630 INFO    HandlerThread:136862 [handler.py:finish():731] shutting down handler
+2022-07-29 18:36:34,224 INFO    WriterThread:136862 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/run-356uc50u.wandb
+2022-07-29 18:36:34,627 INFO    SenderThread:136862 [sender.py:finish():1070] shutting down sender
+2022-07-29 18:36:34,627 INFO    SenderThread:136862 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 18:36:34,627 INFO    SenderThread:136862 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 18:36:34,630 INFO    MainThread:136862 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220729_183213-356uc50u/logs/debug.log b/wandb/run-20220729_183213-356uc50u/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..f369ca3d2266e28987e9d86c9bf86bc0de963656
--- /dev/null
+++ b/wandb/run-20220729_183213-356uc50u/logs/debug.log
@@ -0,0 +1,130 @@
+2022-07-29 18:32:13,607 INFO    MainThread:135604 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-29 18:32:13,608 INFO    MainThread:135604 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-29 18:32:13,608 INFO    MainThread:135604 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/logs/debug.log
+2022-07-29 18:32:13,608 INFO    MainThread:135604 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_183213-356uc50u/logs/debug-internal.log
+2022-07-29 18:32:13,608 INFO    MainThread:135604 [wandb_init.py:init():404] calling init triggers
+2022-07-29 18:32:13,608 INFO    MainThread:135604 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-29 18:32:13,608 INFO    MainThread:135604 [wandb_init.py:init():460] starting backend
+2022-07-29 18:32:13,608 INFO    MainThread:135604 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-29 18:32:13,667 INFO    MainThread:135604 [backend.py:ensure_launched():216] starting backend process...
+2022-07-29 18:32:13,694 INFO    MainThread:135604 [backend.py:ensure_launched():221] started backend process with pid: 136862
+2022-07-29 18:32:13,698 INFO    MainThread:135604 [wandb_init.py:init():469] backend started and connected
+2022-07-29 18:32:13,713 INFO    MainThread:135604 [wandb_init.py:init():533] updated telemetry
+2022-07-29 18:32:13,778 INFO    MainThread:135604 [wandb_init.py:init():563] communicating current version
+2022-07-29 18:32:14,526 INFO    MainThread:135604 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-29 18:32:14,526 INFO    MainThread:135604 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-29 18:32:14,730 INFO    MainThread:135604 [wandb_init.py:init():606] starting run threads in backend
+2022-07-29 18:32:17,197 INFO    MainThread:135604 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-29 18:32:17,197 INFO    MainThread:135604 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-29 18:32:17,198 INFO    MainThread:135604 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-29 18:32:17,204 INFO    MainThread:135604 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-29 18:32:17,204 INFO    MainThread:135604 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-29 18:36:28,486 INFO    MainThread:135604 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-29 18:36:28,502 INFO    MainThread:135604 [wandb_run.py:_restore():1752] restore
+2022-07-29 18:36:30,799 INFO    MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73662
+  total_bytes: 73662
+}
+
+2022-07-29 18:36:31,178 INFO    MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73662
+  total_bytes: 73662
+}
+
+2022-07-29 18:36:31,864 INFO    MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73662
+  total_bytes: 98442
+}
+
+2022-07-29 18:36:31,966 INFO    MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73662
+  total_bytes: 98442
+}
+
+2022-07-29 18:36:32,068 INFO    MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 98442
+  total_bytes: 98442
+}
+
+2022-07-29 18:36:32,170 INFO    MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 98442
+  total_bytes: 98442
+}
+
+2022-07-29 18:36:32,271 INFO    MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 98442
+  total_bytes: 98442
+}
+
+2022-07-29 18:36:32,373 INFO    MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 98442
+  total_bytes: 98442
+}
+
+2022-07-29 18:36:32,474 INFO    MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 98442
+  total_bytes: 98442
+}
+
+2022-07-29 18:36:33,222 INFO    MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 98442
+  total_bytes: 98442
+}
+
+2022-07-29 18:36:33,627 INFO    MainThread:135604 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 98442
+  total_bytes: 98442
+}
+local_info {
+}
+
+2022-07-29 18:36:35,126 INFO    MainThread:135604 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220729_183213-356uc50u/run-356uc50u.wandb b/wandb/run-20220729_183213-356uc50u/run-356uc50u.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..3c662a838aedae5fdf972eb40e73fe0930af9ff6
--- /dev/null
+++ b/wandb/run-20220729_183213-356uc50u/run-356uc50u.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abb83d82b3b2c65b07ff90b0a90d06625b3524560ee7653e99485761b1a56795
+size 73924
diff --git a/wandb/run-20220729_184558-17ksemgv/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_184558-17ksemgv/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe
--- /dev/null
+++ b/wandb/run-20220729_184558-17ksemgv/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1596 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220729_184558-17ksemgv/files/config.yaml b/wandb/run-20220729_184558-17ksemgv/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8d4d0b698347efd2ad514df94f93abca922e9980
--- /dev/null
+++ b/wandb/run-20220729_184558-17ksemgv/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659120358
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220729_184558-17ksemgv/files/output.log b/wandb/run-20220729_184558-17ksemgv/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..e398040d6c714dd10c8d12b43106aa83ca649fb4
--- /dev/null
+++ b/wandb/run-20220729_184558-17ksemgv/files/output.log
@@ -0,0 +1,125 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=500,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul29_18-45-54_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=32,
+per_device_train_batch_size=32,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=500,
+save_strategy=steps,
+save_total_limit=3,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+Downloading and preparing dataset nst/no-close to /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53...
+Downloading data files: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 6912.42it/s]
+Downloading data files: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 3877.63it/s]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1596, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 870, in main
+    raw_datasets = make_dataset(seed=training_args.seed)
+  File "run_flax_speech_recognition_ctc.py", line 802, in make_dataset
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+  File "/data/flax/lib/python3.8/site-packages/datasets/load.py", line 1746, in load_dataset
+    builder_instance.download_and_prepare(
+  File "/data/flax/lib/python3.8/site-packages/datasets/builder.py", line 704, in download_and_prepare
+    self._download_and_prepare(
+  File "/data/flax/lib/python3.8/site-packages/datasets/builder.py", line 1227, in _download_and_prepare
+    super()._download_and_prepare(dl_manager, verify_infos, check_duplicate_keys=verify_infos)
+  File "/data/flax/lib/python3.8/site-packages/datasets/builder.py", line 795, in _download_and_prepare
+    raise OSError(
+OSError: Cannot find data file.
+Original error:
+sndfile library not found
\ No newline at end of file
diff --git a/wandb/run-20220729_184558-17ksemgv/files/requirements.txt b/wandb/run-20220729_184558-17ksemgv/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..900d3d3303fe2688be1dc797f590f7b5e7fe6e22
--- /dev/null
+++ b/wandb/run-20220729_184558-17ksemgv/files/requirements.txt
@@ -0,0 +1,149 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pygments==2.11.1
+pyparsing==3.0.6
+python-dateutil==2.8.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+soundfile==0.10.3.post1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.11.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220729_184558-17ksemgv/files/wandb-metadata.json b/wandb/run-20220729_184558-17ksemgv/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..0176e229ced665dfe75d1818d378c53db5213a3e
--- /dev/null
+++ b/wandb/run-20220729_184558-17ksemgv/files/wandb-metadata.json
@@ -0,0 +1,67 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-29T18:46:01.736927",
+    "startedAt": "2022-07-29T18:45:58.536643",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=32",
+        "--per_device_eval_batch_size=32",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=500",
+        "--eval_steps=500",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=3",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project",
+        "wav2vec2",
+        "--wandb_name",
+        "wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220729_184558-17ksemgv/files/wandb-summary.json b/wandb/run-20220729_184558-17ksemgv/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..2bc5aefdc7f6669183104a6d63606b2dade460cd
--- /dev/null
+++ b/wandb/run-20220729_184558-17ksemgv/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 30}}
\ No newline at end of file
diff --git a/wandb/run-20220729_184558-17ksemgv/logs/debug-internal.log b/wandb/run-20220729_184558-17ksemgv/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..700123cf57ecd3f1f31758c0c6a37af64dcdb384
--- /dev/null
+++ b/wandb/run-20220729_184558-17ksemgv/logs/debug-internal.log
@@ -0,0 +1,155 @@
+2022-07-29 18:45:59,355 INFO    MainThread:151536 [internal.py:wandb_internal():87] W&B internal server running at pid: 151536, started at: 2022-07-29 18:45:59.355794
+2022-07-29 18:45:59,357 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: check_version
+2022-07-29 18:45:59,357 INFO    WriterThread:151536 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/run-17ksemgv.wandb
+2022-07-29 18:45:59,358 DEBUG   SenderThread:151536 [sender.py:send():234] send: header
+2022-07-29 18:45:59,358 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: check_version
+2022-07-29 18:45:59,396 DEBUG   SenderThread:151536 [sender.py:send():234] send: run
+2022-07-29 18:45:59,569 INFO    SenderThread:151536 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files
+2022-07-29 18:45:59,569 INFO    SenderThread:151536 [sender.py:_start_run_threads():804] run started: 17ksemgv with start time 1659120358
+2022-07-29 18:45:59,569 DEBUG   SenderThread:151536 [sender.py:send():234] send: summary
+2022-07-29 18:45:59,570 INFO    SenderThread:151536 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 18:45:59,570 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: run_start
+2022-07-29 18:46:00,572 INFO    Thread-8  :151536 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/wandb-summary.json
+2022-07-29 18:46:01,736 DEBUG   HandlerThread:151536 [meta.py:__init__():40] meta init
+2022-07-29 18:46:01,736 DEBUG   HandlerThread:151536 [meta.py:__init__():54] meta init done
+2022-07-29 18:46:01,736 DEBUG   HandlerThread:151536 [meta.py:probe():214] probe
+2022-07-29 18:46:01,737 DEBUG   HandlerThread:151536 [meta.py:_setup_git():204] setup git
+2022-07-29 18:46:01,767 DEBUG   HandlerThread:151536 [meta.py:_setup_git():211] setup git done
+2022-07-29 18:46:01,767 DEBUG   HandlerThread:151536 [meta.py:_save_code():92] save code
+2022-07-29 18:46:01,778 DEBUG   HandlerThread:151536 [meta.py:_save_code():113] save code done
+2022-07-29 18:46:01,778 DEBUG   HandlerThread:151536 [meta.py:_save_patches():130] save patches
+2022-07-29 18:46:01,832 DEBUG   HandlerThread:151536 [meta.py:_save_patches():172] save patches done
+2022-07-29 18:46:01,832 DEBUG   HandlerThread:151536 [meta.py:_save_pip():58] save pip
+2022-07-29 18:46:01,833 DEBUG   HandlerThread:151536 [meta.py:_save_pip():72] save pip done
+2022-07-29 18:46:01,833 DEBUG   HandlerThread:151536 [meta.py:probe():252] probe done
+2022-07-29 18:46:01,836 DEBUG   SenderThread:151536 [sender.py:send():234] send: files
+2022-07-29 18:46:01,836 INFO    SenderThread:151536 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-29 18:46:01,836 INFO    SenderThread:151536 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-29 18:46:01,841 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:46:01,842 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:46:02,351 INFO    Thread-11 :151536 [upload_job.py:push():137] Uploaded file /tmp/tmpnc48zcdrwandb/1iq31jnf-wandb-metadata.json
+2022-07-29 18:46:02,540 INFO    Thread-12 :151536 [upload_job.py:push():137] Uploaded file /tmp/tmpnc48zcdrwandb/18lhcemh-code/run_flax_speech_recognition_ctc.py
+2022-07-29 18:46:02,583 INFO    Thread-8  :151536 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/code/run_flax_speech_recognition_ctc.py
+2022-07-29 18:46:02,583 INFO    Thread-8  :151536 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log
+2022-07-29 18:46:02,583 INFO    Thread-8  :151536 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/wandb-metadata.json
+2022-07-29 18:46:02,584 INFO    Thread-8  :151536 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/requirements.txt
+2022-07-29 18:46:02,584 INFO    Thread-8  :151536 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/code
+2022-07-29 18:46:04,584 INFO    Thread-8  :151536 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log
+2022-07-29 18:46:06,585 INFO    Thread-8  :151536 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log
+2022-07-29 18:46:16,994 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:46:16,994 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:46:18,589 INFO    Thread-8  :151536 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log
+2022-07-29 18:46:22,591 INFO    Thread-8  :151536 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log
+2022-07-29 18:46:29,819 DEBUG   SenderThread:151536 [sender.py:send():234] send: stats
+2022-07-29 18:46:30,399 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:46:30,399 DEBUG   SenderThread:151536 [sender.py:send():234] send: telemetry
+2022-07-29 18:46:30,400 DEBUG   SenderThread:151536 [sender.py:send():234] send: exit
+2022-07-29 18:46:30,400 INFO    SenderThread:151536 [sender.py:send_exit():366] handling exit code: 1
+2022-07-29 18:46:30,400 INFO    SenderThread:151536 [sender.py:send_exit():368] handling runtime: 30
+2022-07-29 18:46:30,400 INFO    SenderThread:151536 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 18:46:30,401 INFO    SenderThread:151536 [sender.py:send_exit():374] send defer
+2022-07-29 18:46:30,401 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:46:30,401 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:46:30,401 INFO    HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-29 18:46:30,402 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:46:30,402 INFO    SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-29 18:46:30,402 INFO    SenderThread:151536 [sender.py:transition_state():387] send defer: 1
+2022-07-29 18:46:30,402 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:46:30,402 INFO    HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-29 18:46:30,421 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:46:30,421 INFO    SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-29 18:46:30,422 INFO    SenderThread:151536 [sender.py:transition_state():387] send defer: 2
+2022-07-29 18:46:30,422 DEBUG   SenderThread:151536 [sender.py:send():234] send: stats
+2022-07-29 18:46:30,422 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:46:30,422 INFO    HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-29 18:46:30,422 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:46:30,422 INFO    SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-29 18:46:30,422 INFO    SenderThread:151536 [sender.py:transition_state():387] send defer: 3
+2022-07-29 18:46:30,423 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:46:30,423 INFO    HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-29 18:46:30,423 DEBUG   SenderThread:151536 [sender.py:send():234] send: summary
+2022-07-29 18:46:30,423 INFO    SenderThread:151536 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 18:46:30,423 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:46:30,423 INFO    SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-29 18:46:30,423 INFO    SenderThread:151536 [sender.py:transition_state():387] send defer: 4
+2022-07-29 18:46:30,424 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:46:30,424 INFO    HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-29 18:46:30,424 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:46:30,424 INFO    SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-29 18:46:30,503 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:46:30,585 INFO    SenderThread:151536 [sender.py:transition_state():387] send defer: 5
+2022-07-29 18:46:30,585 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:46:30,585 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:46:30,585 INFO    HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-29 18:46:30,586 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:46:30,586 INFO    SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-29 18:46:30,586 INFO    SenderThread:151536 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-29 18:46:30,594 INFO    SenderThread:151536 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/wandb-summary.json
+2022-07-29 18:46:30,594 INFO    SenderThread:151536 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log
+2022-07-29 18:46:30,594 INFO    SenderThread:151536 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/config.yaml
+2022-07-29 18:46:30,594 INFO    SenderThread:151536 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files
+2022-07-29 18:46:30,594 INFO    SenderThread:151536 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/config.yaml config.yaml
+2022-07-29 18:46:30,594 INFO    SenderThread:151536 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/requirements.txt requirements.txt
+2022-07-29 18:46:30,595 INFO    SenderThread:151536 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log output.log
+2022-07-29 18:46:30,595 INFO    SenderThread:151536 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/wandb-summary.json wandb-summary.json
+2022-07-29 18:46:30,595 INFO    SenderThread:151536 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/wandb-metadata.json wandb-metadata.json
+2022-07-29 18:46:30,604 INFO    SenderThread:151536 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-29 18:46:30,604 INFO    SenderThread:151536 [sender.py:transition_state():387] send defer: 6
+2022-07-29 18:46:30,604 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:46:30,605 INFO    HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-29 18:46:30,605 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:46:30,605 INFO    SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-29 18:46:30,605 INFO    SenderThread:151536 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 18:46:30,686 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:46:30,686 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:46:30,789 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:46:30,789 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:46:30,891 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:46:30,891 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:46:30,992 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:46:30,993 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:46:31,082 INFO    Thread-13 :151536 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/config.yaml
+2022-07-29 18:46:31,094 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:46:31,094 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:46:31,107 INFO    Thread-16 :151536 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/wandb-summary.json
+2022-07-29 18:46:31,119 INFO    Thread-15 :151536 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/output.log
+2022-07-29 18:46:31,195 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:46:31,196 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:46:31,297 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:46:31,297 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:46:31,319 INFO    Thread-14 :151536 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/files/requirements.txt
+2022-07-29 18:46:31,398 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:46:31,399 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:46:31,500 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:46:31,500 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:46:31,519 INFO    Thread-7  :151536 [sender.py:transition_state():387] send defer: 7
+2022-07-29 18:46:31,520 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:46:31,520 INFO    HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-29 18:46:31,520 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:46:31,520 INFO    SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-29 18:46:31,601 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:46:31,949 INFO    SenderThread:151536 [sender.py:transition_state():387] send defer: 8
+2022-07-29 18:46:31,949 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:46:31,950 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:46:31,950 INFO    HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-29 18:46:31,950 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:46:31,950 INFO    SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-29 18:46:31,950 INFO    SenderThread:151536 [sender.py:transition_state():387] send defer: 9
+2022-07-29 18:46:31,950 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:46:31,950 DEBUG   SenderThread:151536 [sender.py:send():234] send: final
+2022-07-29 18:46:31,951 INFO    HandlerThread:151536 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-29 18:46:31,951 DEBUG   SenderThread:151536 [sender.py:send():234] send: footer
+2022-07-29 18:46:31,951 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:46:31,951 INFO    SenderThread:151536 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-29 18:46:32,051 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:46:32,051 DEBUG   SenderThread:151536 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:46:32,051 INFO    SenderThread:151536 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 18:46:32,311 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-29 18:46:32,312 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-29 18:46:32,312 DEBUG   HandlerThread:151536 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-29 18:46:32,312 INFO    HandlerThread:151536 [handler.py:finish():731] shutting down handler
+2022-07-29 18:46:32,951 INFO    WriterThread:151536 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/run-17ksemgv.wandb
+2022-07-29 18:46:33,310 INFO    SenderThread:151536 [sender.py:finish():1070] shutting down sender
+2022-07-29 18:46:33,310 INFO    SenderThread:151536 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 18:46:33,310 INFO    SenderThread:151536 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 18:46:33,312 INFO    MainThread:151536 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220729_184558-17ksemgv/logs/debug.log b/wandb/run-20220729_184558-17ksemgv/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..a8ce84d6f5e6e827e2c5c316e99533f60ce6d4ab
--- /dev/null
+++ b/wandb/run-20220729_184558-17ksemgv/logs/debug.log
@@ -0,0 +1,148 @@
+2022-07-29 18:45:58,537 INFO    MainThread:150277 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-29 18:45:58,538 INFO    MainThread:150277 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-29 18:45:58,538 INFO    MainThread:150277 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/logs/debug.log
+2022-07-29 18:45:58,538 INFO    MainThread:150277 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184558-17ksemgv/logs/debug-internal.log
+2022-07-29 18:45:58,538 INFO    MainThread:150277 [wandb_init.py:init():404] calling init triggers
+2022-07-29 18:45:58,538 INFO    MainThread:150277 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-29 18:45:58,538 INFO    MainThread:150277 [wandb_init.py:init():460] starting backend
+2022-07-29 18:45:58,538 INFO    MainThread:150277 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-29 18:45:58,564 INFO    MainThread:150277 [backend.py:ensure_launched():216] starting backend process...
+2022-07-29 18:45:58,588 INFO    MainThread:150277 [backend.py:ensure_launched():221] started backend process with pid: 151536
+2022-07-29 18:45:58,590 INFO    MainThread:150277 [wandb_init.py:init():469] backend started and connected
+2022-07-29 18:45:58,602 INFO    MainThread:150277 [wandb_init.py:init():533] updated telemetry
+2022-07-29 18:45:58,661 INFO    MainThread:150277 [wandb_init.py:init():563] communicating current version
+2022-07-29 18:45:59,394 INFO    MainThread:150277 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-29 18:45:59,394 INFO    MainThread:150277 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-29 18:45:59,570 INFO    MainThread:150277 [wandb_init.py:init():606] starting run threads in backend
+2022-07-29 18:46:01,841 INFO    MainThread:150277 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-29 18:46:01,841 INFO    MainThread:150277 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-29 18:46:01,842 INFO    MainThread:150277 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-29 18:46:01,844 INFO    MainThread:150277 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-29 18:46:01,844 INFO    MainThread:150277 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-29 18:46:28,169 INFO    MainThread:150277 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-29 18:46:28,175 INFO    MainThread:150277 [wandb_run.py:_restore():1752] restore
+2022-07-29 18:46:30,401 INFO    MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73662
+  total_bytes: 73662
+}
+
+2022-07-29 18:46:30,586 INFO    MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73662
+  total_bytes: 73662
+}
+
+2022-07-29 18:46:30,687 INFO    MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73662
+  total_bytes: 81928
+}
+
+2022-07-29 18:46:30,790 INFO    MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 79233
+  total_bytes: 81928
+}
+
+2022-07-29 18:46:30,892 INFO    MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 79260
+  total_bytes: 81928
+}
+
+2022-07-29 18:46:30,993 INFO    MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 79260
+  total_bytes: 81928
+}
+
+2022-07-29 18:46:31,095 INFO    MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 81928
+  total_bytes: 81928
+}
+
+2022-07-29 18:46:31,196 INFO    MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 81928
+  total_bytes: 81928
+}
+
+2022-07-29 18:46:31,298 INFO    MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 81928
+  total_bytes: 81928
+}
+
+2022-07-29 18:46:31,399 INFO    MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 81928
+  total_bytes: 81928
+}
+
+2022-07-29 18:46:31,501 INFO    MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 81928
+  total_bytes: 81928
+}
+
+2022-07-29 18:46:31,950 INFO    MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 81928
+  total_bytes: 81928
+}
+
+2022-07-29 18:46:32,310 INFO    MainThread:150277 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 81928
+  total_bytes: 81928
+}
+local_info {
+}
+
+2022-07-29 18:46:33,786 INFO    MainThread:150277 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220729_184558-17ksemgv/run-17ksemgv.wandb b/wandb/run-20220729_184558-17ksemgv/run-17ksemgv.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..7483c2dbcaff83cfe1f43bf70a9c2bfb08124aa2
--- /dev/null
+++ b/wandb/run-20220729_184558-17ksemgv/run-17ksemgv.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eeafbf0c6357efcfabe27fe93edbeb5d9c48f7372da0e68e6b6b22a7548de9df
+size 6320
diff --git a/wandb/run-20220729_184945-3vhqjnn9/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_184945-3vhqjnn9/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe
--- /dev/null
+++ b/wandb/run-20220729_184945-3vhqjnn9/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1596 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220729_184945-3vhqjnn9/files/config.yaml b/wandb/run-20220729_184945-3vhqjnn9/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..23749e5e0dfae07e8573d73012b1a148268b2c54
--- /dev/null
+++ b/wandb/run-20220729_184945-3vhqjnn9/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659120585
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220729_184945-3vhqjnn9/files/output.log b/wandb/run-20220729_184945-3vhqjnn9/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..abb7d0c55175cf5db1100ac493415dbe07a2505a
--- /dev/null
+++ b/wandb/run-20220729_184945-3vhqjnn9/files/output.log
@@ -0,0 +1,313 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=500,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul29_18-49-41_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=32,
+per_device_train_batch_size=32,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=500,
+save_strategy=steps,
+save_total_limit=3,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+Downloading and preparing dataset nst/no-close to /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53...
+Downloading data files: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 6694.23it/s]
+Downloading data files: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 3591.01it/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Generating test split: 72763 examples [00:49, 2217.99 examples/s]
+
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 32.19it/s]
+Downloading builder script: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.20k/9.20k [00:00<00:00, 7.87MB/s]
+Downloading and preparing dataset npsc/16K_mp3 to /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc...
+Downloading data: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 51.1k/51.1k [00:00<00:00, 299kB/s]
+Downloading data: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 498k/498k [00:00<00:00, 1.45MB/s]
+Downloading data: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 525k/525k [00:00<00:00, 1.23MB/s]
+Downloading data: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 758k/758k [00:00<00:00, 1.76MB/s]
+Downloading data: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 681k/681k [00:00<00:00, 1.58MB/s]
+Downloading data: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 852k/852k [00:00<00:00, 1.66MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.60M/1.60M [00:00<00:00, 3.08MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.27M/1.27M [00:00<00:00, 2.45MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.58M/1.58M [00:00<00:00, 3.05MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.89M/1.89M [00:00<00:00, 3.17MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.40M/1.40M [00:00<00:00, 2.72MB/s]
+Downloading data files #11:   0%|                                                                                                                                                                                                                                  | 0/2 [00:00<?, ?obj/s]
+Downloading data files #12:   0%|                                                                                                                                                                                                                                  | 0/2 [00:00<?, ?obj/s]
+Downloading data files #13:   0%|                                                                                                                                                                                                                                  | 0/2 [00:00<?, ?obj/s]
+Downloading data files #14:   0%|                                                                                                                                                                                                                                  | 0/2 [00:00<?, ?obj/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 62.6M/62.6M [00:00<00:00, 95.5MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.14M/1.14M [00:00<00:00, 55.1MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12.8M/12.8M [00:00<00:00, 99.2MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12.6M/12.6M [00:00<00:00, 79.9MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15.2M/15.2M [00:00<00:00, 60.6MB/s]
+Downloading data: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18.0M/18.0M [00:00<00:00, 100MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16.5M/16.5M [00:00<00:00, 53.6MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31.8M/31.8M [00:00<00:00, 97.6MB/s]
+Downloading data: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 36.3M/36.3M [00:00<00:00, 104MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.0M/33.0M [00:00<00:00, 79.2MB/s]
+Downloading data: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50.0M/50.0M [00:00<00:00, 103MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 37.8M/37.8M [00:00<00:00, 67.6MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 44.7M/44.7M [00:00<00:00, 84.1MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 35.5M/35.5M [00:00<00:00, 58.0MB/s]
+Downloading data: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 746k/746k [00:00<00:00, 1.74MB/s]
+Downloading data: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 62.7M/62.7M [00:00<00:00, 104MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 62.6M/62.6M [00:00<00:00, 95.5MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60.2M/60.2M [00:00<00:00, 65.6MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.39M/1.39M [00:00<00:00, 2.69MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.45M/2.45M [00:00<00:00, 4.05MB/s]
+Downloading data:   0%|                                                                                                                                                                                                                                       | 0.00/1.05M [00:00<?, ?B/s]
+Downloading data files #8:  50%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                             | 1/2 [00:03<00:03,  3.20s/obj]
+Downloading data files #11:  50%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                             | 1/2 [00:03<00:03,  3.10s/obj]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.59M/1.59M [00:00<00:00, 3.06MB/s]
+Downloading data: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 130k/130k [00:00<00:00, 495kB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.79M/2.79M [00:00<00:00, 4.55MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.29M/2.29M [00:00<00:00, 3.77MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31.9M/31.9M [00:00<00:00, 75.7MB/s]
+Downloading data files #12: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00,  2.33s/obj]
+Downloading data: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27.2M/27.2M [00:00<00:00, 104MB/s]
+Downloading data files #9: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00,  2.33s/obj]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30.9M/30.9M [00:00<00:00, 89.8MB/s]
+Downloading data files #5: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00,  2.37s/obj]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15.1M/15.1M [00:00<00:00, 37.3MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19.6M/19.6M [00:00<00:00, 95.7MB/s]
+Downloading data files #1: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00,  2.45s/obj]
+Downloading data files #4: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00,  2.45s/obj]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60.4M/60.4M [00:00<00:00, 89.2MB/s]
+Downloading data files #7: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.50s/obj]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.56M/2.56M [00:00<00:00, 75.4MB/s]
+Downloading data files #8: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.52s/obj]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40.5M/40.5M [00:00<00:00, 98.0MB/s]
+Downloading data files #6: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.56s/obj]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24.2M/24.2M [00:00<00:00, 84.6MB/s]
+Downloading data files #0: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.57s/obj]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30.6M/30.6M [00:00<00:00, 85.1MB/s]
+Downloading data files #2: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.58s/obj]
+Downloading data: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60.4M/60.4M [00:00<00:00, 102MB/s]
+Downloading data files #13: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.58s/obj]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31.2M/31.2M [00:00<00:00, 92.3MB/s]
+Downloading data files #3: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.60s/obj]
+Downloading data: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 41.5M/41.5M [00:00<00:00, 106MB/s]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.68s/obj]
+Downloading data:  54%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                       | 33.8M/63.0M [00:00<00:00, 92.0MB/s]
+Downloading data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.59M/1.59M [00:00<00:00, 3.06MB/s]
+Downloading data files #13: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.62s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #15: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.94s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+Downloading data files #14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+    raw_datasets = make_dataset(seed=training_args.seed)███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+    raw_datasets = make_dataset(seed=training_args.seed)███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
+    raw_datasets = make_dataset(seed=training_args.seed)███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.64s/obj]
\ No newline at end of file
diff --git a/wandb/run-20220729_184945-3vhqjnn9/files/requirements.txt b/wandb/run-20220729_184945-3vhqjnn9/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..900d3d3303fe2688be1dc797f590f7b5e7fe6e22
--- /dev/null
+++ b/wandb/run-20220729_184945-3vhqjnn9/files/requirements.txt
@@ -0,0 +1,149 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pygments==2.11.1
+pyparsing==3.0.6
+python-dateutil==2.8.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+soundfile==0.10.3.post1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.11.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220729_184945-3vhqjnn9/files/wandb-metadata.json b/wandb/run-20220729_184945-3vhqjnn9/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..6ddee026c97cbe36dc4f19e89134debab785b52d
--- /dev/null
+++ b/wandb/run-20220729_184945-3vhqjnn9/files/wandb-metadata.json
@@ -0,0 +1,67 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-29T18:49:48.918401",
+    "startedAt": "2022-07-29T18:49:45.480940",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=32",
+        "--per_device_eval_batch_size=32",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=500",
+        "--eval_steps=500",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=3",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project",
+        "wav2vec2",
+        "--wandb_name",
+        "wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220729_184945-3vhqjnn9/files/wandb-summary.json b/wandb/run-20220729_184945-3vhqjnn9/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..f0ab502d9bf9fb161c27333bc2f373e06cdb2e55
--- /dev/null
+++ b/wandb/run-20220729_184945-3vhqjnn9/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 333}}
\ No newline at end of file
diff --git a/wandb/run-20220729_184945-3vhqjnn9/logs/debug-internal.log b/wandb/run-20220729_184945-3vhqjnn9/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..b67676523bd12da09b64e484099f63165df8b087
--- /dev/null
+++ b/wandb/run-20220729_184945-3vhqjnn9/logs/debug-internal.log
@@ -0,0 +1,336 @@
+2022-07-29 18:49:46,299 INFO    MainThread:156916 [internal.py:wandb_internal():87] W&B internal server running at pid: 156916, started at: 2022-07-29 18:49:46.299407
+2022-07-29 18:49:46,301 INFO    WriterThread:156916 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/run-3vhqjnn9.wandb
+2022-07-29 18:49:46,302 DEBUG   SenderThread:156916 [sender.py:send():234] send: header
+2022-07-29 18:49:46,303 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: check_version
+2022-07-29 18:49:46,303 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: check_version
+2022-07-29 18:49:46,339 DEBUG   SenderThread:156916 [sender.py:send():234] send: run
+2022-07-29 18:49:46,583 INFO    SenderThread:156916 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files
+2022-07-29 18:49:46,583 INFO    SenderThread:156916 [sender.py:_start_run_threads():804] run started: 3vhqjnn9 with start time 1659120585
+2022-07-29 18:49:46,583 DEBUG   SenderThread:156916 [sender.py:send():234] send: summary
+2022-07-29 18:49:46,583 INFO    SenderThread:156916 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 18:49:46,584 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: run_start
+2022-07-29 18:49:47,590 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/wandb-summary.json
+2022-07-29 18:49:48,918 DEBUG   HandlerThread:156916 [meta.py:__init__():40] meta init
+2022-07-29 18:49:48,918 DEBUG   HandlerThread:156916 [meta.py:__init__():54] meta init done
+2022-07-29 18:49:48,918 DEBUG   HandlerThread:156916 [meta.py:probe():214] probe
+2022-07-29 18:49:48,920 DEBUG   HandlerThread:156916 [meta.py:_setup_git():204] setup git
+2022-07-29 18:49:48,951 DEBUG   HandlerThread:156916 [meta.py:_setup_git():211] setup git done
+2022-07-29 18:49:48,951 DEBUG   HandlerThread:156916 [meta.py:_save_code():92] save code
+2022-07-29 18:49:48,963 DEBUG   HandlerThread:156916 [meta.py:_save_code():113] save code done
+2022-07-29 18:49:48,963 DEBUG   HandlerThread:156916 [meta.py:_save_patches():130] save patches
+2022-07-29 18:49:49,018 DEBUG   HandlerThread:156916 [meta.py:_save_patches():172] save patches done
+2022-07-29 18:49:49,018 DEBUG   HandlerThread:156916 [meta.py:_save_pip():58] save pip
+2022-07-29 18:49:49,018 DEBUG   HandlerThread:156916 [meta.py:_save_pip():72] save pip done
+2022-07-29 18:49:49,018 DEBUG   HandlerThread:156916 [meta.py:probe():252] probe done
+2022-07-29 18:49:49,021 DEBUG   SenderThread:156916 [sender.py:send():234] send: files
+2022-07-29 18:49:49,021 INFO    SenderThread:156916 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-29 18:49:49,022 INFO    SenderThread:156916 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-29 18:49:49,028 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:49:49,029 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:49:49,533 INFO    Thread-11 :156916 [upload_job.py:push():137] Uploaded file /tmp/tmpoooxta_1wandb/3f107fds-wandb-metadata.json
+2022-07-29 18:49:49,590 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/wandb-metadata.json
+2022-07-29 18:49:49,590 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:49:49,590 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/code/run_flax_speech_recognition_ctc.py
+2022-07-29 18:49:49,591 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/requirements.txt
+2022-07-29 18:49:49,591 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/code
+2022-07-29 18:49:49,708 INFO    Thread-12 :156916 [upload_job.py:push():137] Uploaded file /tmp/tmpoooxta_1wandb/1u5lxy8g-code/run_flax_speech_recognition_ctc.py
+2022-07-29 18:49:51,591 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:49:53,592 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:04,185 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:50:04,185 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:50:05,597 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:09,599 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:15,602 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:17,002 DEBUG   SenderThread:156916 [sender.py:send():234] send: stats
+2022-07-29 18:50:17,603 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:19,334 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:50:19,334 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:50:19,604 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:21,605 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:23,606 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:25,607 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:27,608 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:29,609 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:31,610 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:34,473 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:50:34,473 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:50:39,613 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:41,614 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:43,615 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:45,616 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:47,076 DEBUG   SenderThread:156916 [sender.py:send():234] send: stats
+2022-07-29 18:50:47,617 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:49,609 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:50:49,610 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:50:49,618 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:51,619 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:50:53,620 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:01,624 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:03,625 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:04,743 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:51:04,744 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:51:05,626 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:07,627 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:09,628 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:11,629 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:13,630 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:15,631 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:17,148 DEBUG   SenderThread:156916 [sender.py:send():234] send: stats
+2022-07-29 18:51:19,889 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:51:19,889 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:51:21,634 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:23,635 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:25,636 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:27,637 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:29,638 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:31,639 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:33,640 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:35,025 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:51:35,025 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:51:35,641 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:37,642 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:43,645 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:45,646 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:47,220 DEBUG   SenderThread:156916 [sender.py:send():234] send: stats
+2022-07-29 18:51:47,647 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:49,648 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:50,159 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:51:50,159 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:51:51,649 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:53,650 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:55,651 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:57,652 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:51:59,653 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:05,299 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:52:05,299 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:52:05,655 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:07,656 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:09,657 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:11,658 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:13,659 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:15,660 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:17,296 DEBUG   SenderThread:156916 [sender.py:send():234] send: stats
+2022-07-29 18:52:17,661 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:19,662 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:20,447 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:52:20,447 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:52:21,663 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:27,666 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:29,667 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:31,667 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:33,668 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:35,610 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:52:35,610 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:52:35,669 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:37,670 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:39,671 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:41,672 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:43,673 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:47,371 DEBUG   SenderThread:156916 [sender.py:send():234] send: stats
+2022-07-29 18:52:49,676 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:50,755 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:52:50,756 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:52:51,677 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:53,678 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:55,679 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:57,680 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:52:59,680 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:01,681 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:03,682 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:05,683 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:05,890 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:53:05,890 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:53:09,685 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:11,686 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:13,687 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:15,688 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:17,443 DEBUG   SenderThread:156916 [sender.py:send():234] send: stats
+2022-07-29 18:53:17,689 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:19,690 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:21,033 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:53:21,033 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:53:21,691 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:27,694 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:29,695 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:31,696 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:33,696 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:35,697 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:36,170 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:53:36,170 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:53:37,698 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:39,699 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:41,700 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:47,514 DEBUG   SenderThread:156916 [sender.py:send():234] send: stats
+2022-07-29 18:53:47,703 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:49,704 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:51,305 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:53:51,305 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:53:51,705 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:54,706 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:56,707 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:53:58,708 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:00,709 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:02,710 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:04,712 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:06,440 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:54:06,440 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:54:06,712 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:08,714 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:10,715 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:12,716 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:14,717 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:16,718 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:17,588 DEBUG   SenderThread:156916 [sender.py:send():234] send: stats
+2022-07-29 18:54:18,719 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:20,720 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:21,595 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:54:21,595 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:54:22,720 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:24,721 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:26,722 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:28,723 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:30,724 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:32,725 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:34,726 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:36,727 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:36,741 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:54:36,741 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:54:38,728 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:40,729 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:42,730 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:44,731 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:46,732 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:47,661 DEBUG   SenderThread:156916 [sender.py:send():234] send: stats
+2022-07-29 18:54:48,732 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:50,733 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:51,880 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:54:51,881 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:54:52,734 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:54,735 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:56,736 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:54:58,737 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:55:00,738 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:55:02,739 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:55:04,740 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:55:06,741 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:55:07,034 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:55:07,035 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:55:08,742 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:55:10,743 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:55:12,744 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:55:14,745 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:55:16,746 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:55:17,735 DEBUG   SenderThread:156916 [sender.py:send():234] send: stats
+2022-07-29 18:55:18,747 INFO    Thread-8  :156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:55:19,774 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:55:19,774 DEBUG   SenderThread:156916 [sender.py:send():234] send: telemetry
+2022-07-29 18:55:19,774 DEBUG   SenderThread:156916 [sender.py:send():234] send: exit
+2022-07-29 18:55:19,774 INFO    SenderThread:156916 [sender.py:send_exit():366] handling exit code: 1
+2022-07-29 18:55:19,775 INFO    SenderThread:156916 [sender.py:send_exit():368] handling runtime: 333
+2022-07-29 18:55:19,790 INFO    SenderThread:156916 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 18:55:19,790 INFO    SenderThread:156916 [sender.py:send_exit():374] send defer
+2022-07-29 18:55:19,790 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:55:19,791 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:55:19,791 INFO    HandlerThread:156916 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-29 18:55:19,791 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:55:19,791 INFO    SenderThread:156916 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-29 18:55:19,791 INFO    SenderThread:156916 [sender.py:transition_state():387] send defer: 1
+2022-07-29 18:55:19,791 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:55:19,791 INFO    HandlerThread:156916 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-29 18:55:19,841 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:55:19,841 INFO    SenderThread:156916 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-29 18:55:19,841 INFO    SenderThread:156916 [sender.py:transition_state():387] send defer: 2
+2022-07-29 18:55:19,841 DEBUG   SenderThread:156916 [sender.py:send():234] send: stats
+2022-07-29 18:55:19,842 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:55:19,842 INFO    HandlerThread:156916 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-29 18:55:19,842 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:55:19,842 INFO    SenderThread:156916 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-29 18:55:19,842 INFO    SenderThread:156916 [sender.py:transition_state():387] send defer: 3
+2022-07-29 18:55:19,842 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:55:19,843 INFO    HandlerThread:156916 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-29 18:55:19,843 DEBUG   SenderThread:156916 [sender.py:send():234] send: summary
+2022-07-29 18:55:19,851 INFO    SenderThread:156916 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 18:55:19,851 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:55:19,851 INFO    SenderThread:156916 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-29 18:55:19,851 INFO    SenderThread:156916 [sender.py:transition_state():387] send defer: 4
+2022-07-29 18:55:19,851 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:55:19,851 INFO    HandlerThread:156916 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-29 18:55:19,852 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:55:19,852 INFO    SenderThread:156916 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-29 18:55:19,893 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:55:20,060 INFO    SenderThread:156916 [sender.py:transition_state():387] send defer: 5
+2022-07-29 18:55:20,060 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:55:20,061 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:55:20,061 INFO    HandlerThread:156916 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-29 18:55:20,061 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:55:20,061 INFO    SenderThread:156916 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-29 18:55:20,061 INFO    SenderThread:156916 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-29 18:55:20,162 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:55:20,748 INFO    SenderThread:156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:55:20,749 INFO    SenderThread:156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/config.yaml
+2022-07-29 18:55:20,749 INFO    SenderThread:156916 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/wandb-summary.json
+2022-07-29 18:55:20,749 INFO    SenderThread:156916 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files
+2022-07-29 18:55:20,749 INFO    SenderThread:156916 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/config.yaml config.yaml
+2022-07-29 18:55:20,749 INFO    SenderThread:156916 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/requirements.txt requirements.txt
+2022-07-29 18:55:20,749 INFO    SenderThread:156916 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log output.log
+2022-07-29 18:55:20,755 INFO    SenderThread:156916 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/wandb-summary.json wandb-summary.json
+2022-07-29 18:55:20,761 INFO    SenderThread:156916 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/wandb-metadata.json wandb-metadata.json
+2022-07-29 18:55:20,763 INFO    SenderThread:156916 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-29 18:55:20,763 INFO    SenderThread:156916 [sender.py:transition_state():387] send defer: 6
+2022-07-29 18:55:20,764 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:55:20,769 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:55:20,769 INFO    HandlerThread:156916 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-29 18:55:20,769 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:55:20,769 INFO    SenderThread:156916 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-29 18:55:20,769 INFO    SenderThread:156916 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 18:55:20,866 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:55:20,867 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:55:20,969 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:55:20,969 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:55:21,071 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:55:21,071 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:55:21,173 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:55:21,174 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:55:21,240 INFO    Thread-16 :156916 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/wandb-summary.json
+2022-07-29 18:55:21,244 INFO    Thread-13 :156916 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/config.yaml
+2022-07-29 18:55:21,276 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:55:21,276 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:55:21,306 INFO    Thread-14 :156916 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/requirements.txt
+2022-07-29 18:55:21,378 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:55:21,378 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:55:21,447 INFO    Thread-15 :156916 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/files/output.log
+2022-07-29 18:55:21,480 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:55:21,480 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:55:21,582 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:55:21,582 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:55:21,647 INFO    Thread-7  :156916 [sender.py:transition_state():387] send defer: 7
+2022-07-29 18:55:21,648 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:55:21,648 INFO    HandlerThread:156916 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-29 18:55:21,648 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:55:21,648 INFO    SenderThread:156916 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-29 18:55:21,684 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:55:22,095 INFO    SenderThread:156916 [sender.py:transition_state():387] send defer: 8
+2022-07-29 18:55:22,095 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:55:22,096 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:55:22,096 INFO    HandlerThread:156916 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-29 18:55:22,096 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:55:22,096 INFO    SenderThread:156916 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-29 18:55:22,096 INFO    SenderThread:156916 [sender.py:transition_state():387] send defer: 9
+2022-07-29 18:55:22,097 DEBUG   SenderThread:156916 [sender.py:send():234] send: final
+2022-07-29 18:55:22,097 DEBUG   SenderThread:156916 [sender.py:send():234] send: footer
+2022-07-29 18:55:22,097 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:55:22,097 INFO    HandlerThread:156916 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-29 18:55:22,097 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:55:22,097 INFO    SenderThread:156916 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-29 18:55:22,197 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:55:22,197 DEBUG   SenderThread:156916 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:55:22,197 INFO    SenderThread:156916 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 18:55:22,459 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-29 18:55:22,460 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-29 18:55:22,460 DEBUG   HandlerThread:156916 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-29 18:55:22,460 INFO    HandlerThread:156916 [handler.py:finish():731] shutting down handler
+2022-07-29 18:55:23,098 INFO    WriterThread:156916 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/run-3vhqjnn9.wandb
+2022-07-29 18:55:23,458 INFO    SenderThread:156916 [sender.py:finish():1070] shutting down sender
+2022-07-29 18:55:23,458 INFO    SenderThread:156916 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 18:55:23,458 INFO    SenderThread:156916 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 18:55:23,460 INFO    MainThread:156916 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220729_184945-3vhqjnn9/logs/debug.log b/wandb/run-20220729_184945-3vhqjnn9/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..5cf1e3961a617cc35da1121b11cd0fc28f2d37d7
--- /dev/null
+++ b/wandb/run-20220729_184945-3vhqjnn9/logs/debug.log
@@ -0,0 +1,148 @@
+2022-07-29 18:49:45,482 INFO    MainThread:155663 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-29 18:49:45,482 INFO    MainThread:155663 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-29 18:49:45,482 INFO    MainThread:155663 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/logs/debug.log
+2022-07-29 18:49:45,482 INFO    MainThread:155663 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_184945-3vhqjnn9/logs/debug-internal.log
+2022-07-29 18:49:45,482 INFO    MainThread:155663 [wandb_init.py:init():404] calling init triggers
+2022-07-29 18:49:45,482 INFO    MainThread:155663 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-29 18:49:45,482 INFO    MainThread:155663 [wandb_init.py:init():460] starting backend
+2022-07-29 18:49:45,482 INFO    MainThread:155663 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-29 18:49:45,508 INFO    MainThread:155663 [backend.py:ensure_launched():216] starting backend process...
+2022-07-29 18:49:45,531 INFO    MainThread:155663 [backend.py:ensure_launched():221] started backend process with pid: 156916
+2022-07-29 18:49:45,533 INFO    MainThread:155663 [wandb_init.py:init():469] backend started and connected
+2022-07-29 18:49:45,546 INFO    MainThread:155663 [wandb_init.py:init():533] updated telemetry
+2022-07-29 18:49:45,607 INFO    MainThread:155663 [wandb_init.py:init():563] communicating current version
+2022-07-29 18:49:46,338 INFO    MainThread:155663 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-29 18:49:46,338 INFO    MainThread:155663 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-29 18:49:46,583 INFO    MainThread:155663 [wandb_init.py:init():606] starting run threads in backend
+2022-07-29 18:49:49,026 INFO    MainThread:155663 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-29 18:49:49,027 INFO    MainThread:155663 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-29 18:49:49,027 INFO    MainThread:155663 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-29 18:49:49,029 INFO    MainThread:155663 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-29 18:49:49,029 INFO    MainThread:155663 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-29 18:55:17,595 INFO    MainThread:155663 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-29 18:55:17,599 INFO    MainThread:155663 [wandb_run.py:_restore():1752] restore
+2022-07-29 18:55:19,791 INFO    MainThread:155663 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73662
+  total_bytes: 73662
+}
+
+2022-07-29 18:55:20,061 INFO    MainThread:155663 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73662
+  total_bytes: 73662
+}
+
+2022-07-29 18:55:20,765 INFO    MainThread:155663 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73662
+  total_bytes: 155018
+}
+
+2022-07-29 18:55:20,868 INFO    MainThread:155663 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73662
+  total_bytes: 155018
+}
+
+2022-07-29 18:55:20,970 INFO    MainThread:155663 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 155018
+  total_bytes: 155018
+}
+
+2022-07-29 18:55:21,072 INFO    MainThread:155663 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 155018
+  total_bytes: 155018
+}
+
+2022-07-29 18:55:21,175 INFO    MainThread:155663 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 155018
+  total_bytes: 155018
+}
+
+2022-07-29 18:55:21,277 INFO    MainThread:155663 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 155018
+  total_bytes: 155018
+}
+
+2022-07-29 18:55:21,379 INFO    MainThread:155663 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 155018
+  total_bytes: 155018
+}
+
+2022-07-29 18:55:21,481 INFO    MainThread:155663 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 155018
+  total_bytes: 155018
+}
+
+2022-07-29 18:55:21,583 INFO    MainThread:155663 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 155018
+  total_bytes: 155018
+}
+
+2022-07-29 18:55:22,096 INFO    MainThread:155663 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 155018
+  total_bytes: 155018
+}
+
+2022-07-29 18:55:22,458 INFO    MainThread:155663 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 155018
+  total_bytes: 155018
+}
+local_info {
+}
+
+2022-07-29 18:55:24,009 INFO    MainThread:155663 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220729_184945-3vhqjnn9/run-3vhqjnn9.wandb b/wandb/run-20220729_184945-3vhqjnn9/run-3vhqjnn9.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..ca4d5d7111915e3eaf86395063bde64ca2c23aff
--- /dev/null
+++ b/wandb/run-20220729_184945-3vhqjnn9/run-3vhqjnn9.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b77cff313db79e93541bba66a3c999da57ac72c66a1a8a6bcabc481d87067e80
+size 94212
diff --git a/wandb/run-20220729_185656-3cqptots/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_185656-3cqptots/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe
--- /dev/null
+++ b/wandb/run-20220729_185656-3cqptots/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1596 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220729_185656-3cqptots/files/config.yaml b/wandb/run-20220729_185656-3cqptots/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..12a5489fcc147454cdebaaa0ceeac3c7c8066a92
--- /dev/null
+++ b/wandb/run-20220729_185656-3cqptots/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659121016
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220729_185656-3cqptots/files/output.log b/wandb/run-20220729_185656-3cqptots/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..cf01da2384c1193107b337f68af2be8a462fe957
--- /dev/null
+++ b/wandb/run-20220729_185656-3cqptots/files/output.log
@@ -0,0 +1,158 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul29_18-56-52_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=32,
+per_device_train_batch_size=32,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 32.04it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 451.08it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+  0%|                                                                                                                                                                                                                                                             | 0/256 [00:00<?, ?ba/s]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1596, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 870, in main
+    raw_datasets = make_dataset(seed=training_args.seed)
+  File "run_flax_speech_recognition_ctc.py", line 811, in make_dataset
+    nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed)
+  File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 865, in filter
+    {
+  File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 866, in <dictcomp>
+    k: dataset.filter(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper
+    out = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2904, in filter
+    indices = self.map(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2387, in map
+    return self._map_single(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 557, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper
+    out = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2775, in _map_single
+    batch = apply_function_on_filtered_inputs(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2655, in apply_function_on_filtered_inputs
+    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2347, in decorated
+    result = f(decorated_item, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in get_indices_from_mask_function
+    example = {key: batch[key][i] for key in batch}
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in <dictcomp>
+    example = {key: batch[key][i] for key in batch}
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 133, in __getitem__
+    values = [
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 134, in <listcomp>
+    decode_nested_example(self.features[key], value) if value is not None else None for value in values
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1260, in decode_nested_example
+    return schema.decode_example(obj, token_per_repo_id=token_per_repo_id) if obj is not None else None
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 144, in decode_example
+    array, sampling_rate = self._decode_mp3(file if file else path)
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 293, in _decode_mp3
+    array, sampling_rate = torchaudio.load(path_or_file, format="mp3")
+  File "/data/flax/lib/python3.8/site-packages/torchaudio/backend/sox_io_backend.py", line 214, in load
+    return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format)
+NameError: name '_fallback_load_fileobj' is not defined
\ No newline at end of file
diff --git a/wandb/run-20220729_185656-3cqptots/files/requirements.txt b/wandb/run-20220729_185656-3cqptots/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82f0601b1659d07d7747ba139250246bf7eae997
--- /dev/null
+++ b/wandb/run-20220729_185656-3cqptots/files/requirements.txt
@@ -0,0 +1,150 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pygments==2.11.1
+pyparsing==3.0.6
+python-dateutil==2.8.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+soundfile==0.10.3.post1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220729_185656-3cqptots/files/wandb-metadata.json b/wandb/run-20220729_185656-3cqptots/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..c865d99832a55a713188d199f8ecaa68e7e23b19
--- /dev/null
+++ b/wandb/run-20220729_185656-3cqptots/files/wandb-metadata.json
@@ -0,0 +1,66 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-29T18:57:00.004046",
+    "startedAt": "2022-07-29T18:56:56.698418",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=32",
+        "--per_device_eval_batch_size=32",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220729_185656-3cqptots/files/wandb-summary.json b/wandb/run-20220729_185656-3cqptots/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..b1ac4f7d3564b2fd407d247e6957709faa41a169
--- /dev/null
+++ b/wandb/run-20220729_185656-3cqptots/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 9}}
\ No newline at end of file
diff --git a/wandb/run-20220729_185656-3cqptots/logs/debug-internal.log b/wandb/run-20220729_185656-3cqptots/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..a764a1101709cb1d8e8ad512f5f6066dcb3c7111
--- /dev/null
+++ b/wandb/run-20220729_185656-3cqptots/logs/debug-internal.log
@@ -0,0 +1,149 @@
+2022-07-29 18:56:57,526 INFO    MainThread:165164 [internal.py:wandb_internal():87] W&B internal server running at pid: 165164, started at: 2022-07-29 18:56:57.526162
+2022-07-29 18:56:57,528 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: check_version
+2022-07-29 18:56:57,528 INFO    WriterThread:165164 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/run-3cqptots.wandb
+2022-07-29 18:56:57,529 DEBUG   SenderThread:165164 [sender.py:send():234] send: header
+2022-07-29 18:56:57,529 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: check_version
+2022-07-29 18:56:57,565 DEBUG   SenderThread:165164 [sender.py:send():234] send: run
+2022-07-29 18:56:57,749 INFO    SenderThread:165164 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files
+2022-07-29 18:56:57,749 INFO    SenderThread:165164 [sender.py:_start_run_threads():804] run started: 3cqptots with start time 1659121016
+2022-07-29 18:56:57,750 DEBUG   SenderThread:165164 [sender.py:send():234] send: summary
+2022-07-29 18:56:57,750 INFO    SenderThread:165164 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 18:56:57,752 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: run_start
+2022-07-29 18:56:58,757 INFO    Thread-8  :165164 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/wandb-summary.json
+2022-07-29 18:57:00,003 DEBUG   HandlerThread:165164 [meta.py:__init__():40] meta init
+2022-07-29 18:57:00,003 DEBUG   HandlerThread:165164 [meta.py:__init__():54] meta init done
+2022-07-29 18:57:00,004 DEBUG   HandlerThread:165164 [meta.py:probe():214] probe
+2022-07-29 18:57:00,005 DEBUG   HandlerThread:165164 [meta.py:_setup_git():204] setup git
+2022-07-29 18:57:00,034 DEBUG   HandlerThread:165164 [meta.py:_setup_git():211] setup git done
+2022-07-29 18:57:00,034 DEBUG   HandlerThread:165164 [meta.py:_save_code():92] save code
+2022-07-29 18:57:00,046 DEBUG   HandlerThread:165164 [meta.py:_save_code():113] save code done
+2022-07-29 18:57:00,046 DEBUG   HandlerThread:165164 [meta.py:_save_patches():130] save patches
+2022-07-29 18:57:00,099 DEBUG   HandlerThread:165164 [meta.py:_save_patches():172] save patches done
+2022-07-29 18:57:00,100 DEBUG   HandlerThread:165164 [meta.py:_save_pip():58] save pip
+2022-07-29 18:57:00,100 DEBUG   HandlerThread:165164 [meta.py:_save_pip():72] save pip done
+2022-07-29 18:57:00,100 DEBUG   HandlerThread:165164 [meta.py:probe():252] probe done
+2022-07-29 18:57:00,103 DEBUG   SenderThread:165164 [sender.py:send():234] send: files
+2022-07-29 18:57:00,103 INFO    SenderThread:165164 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-29 18:57:00,104 INFO    SenderThread:165164 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-29 18:57:00,109 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 18:57:00,109 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 18:57:00,591 INFO    Thread-11 :165164 [upload_job.py:push():137] Uploaded file /tmp/tmp02y1jub5wandb/axei5139-wandb-metadata.json
+2022-07-29 18:57:00,757 INFO    Thread-8  :165164 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/code/run_flax_speech_recognition_ctc.py
+2022-07-29 18:57:00,758 INFO    Thread-8  :165164 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/wandb-metadata.json
+2022-07-29 18:57:00,758 INFO    Thread-8  :165164 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/output.log
+2022-07-29 18:57:00,758 INFO    Thread-8  :165164 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/requirements.txt
+2022-07-29 18:57:00,758 INFO    Thread-8  :165164 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/code
+2022-07-29 18:57:00,864 INFO    Thread-12 :165164 [upload_job.py:push():137] Uploaded file /tmp/tmp02y1jub5wandb/ld126f6g-code/run_flax_speech_recognition_ctc.py
+2022-07-29 18:57:02,758 INFO    Thread-8  :165164 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/output.log
+2022-07-29 18:57:04,759 INFO    Thread-8  :165164 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/output.log
+2022-07-29 18:57:06,760 INFO    Thread-8  :165164 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/output.log
+2022-07-29 18:57:07,150 DEBUG   SenderThread:165164 [sender.py:send():234] send: telemetry
+2022-07-29 18:57:07,150 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:57:07,151 DEBUG   SenderThread:165164 [sender.py:send():234] send: exit
+2022-07-29 18:57:07,151 INFO    SenderThread:165164 [sender.py:send_exit():366] handling exit code: 1
+2022-07-29 18:57:07,151 INFO    SenderThread:165164 [sender.py:send_exit():368] handling runtime: 9
+2022-07-29 18:57:07,152 INFO    SenderThread:165164 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 18:57:07,152 INFO    SenderThread:165164 [sender.py:send_exit():374] send defer
+2022-07-29 18:57:07,152 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:57:07,153 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:57:07,153 INFO    HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-29 18:57:07,153 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:57:07,153 INFO    SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-29 18:57:07,153 INFO    SenderThread:165164 [sender.py:transition_state():387] send defer: 1
+2022-07-29 18:57:07,153 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:57:07,153 INFO    HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-29 18:57:07,230 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:57:07,231 INFO    SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-29 18:57:07,231 INFO    SenderThread:165164 [sender.py:transition_state():387] send defer: 2
+2022-07-29 18:57:07,231 DEBUG   SenderThread:165164 [sender.py:send():234] send: stats
+2022-07-29 18:57:07,232 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:57:07,232 INFO    HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-29 18:57:07,232 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:57:07,232 INFO    SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-29 18:57:07,232 INFO    SenderThread:165164 [sender.py:transition_state():387] send defer: 3
+2022-07-29 18:57:07,232 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:57:07,232 INFO    HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-29 18:57:07,232 DEBUG   SenderThread:165164 [sender.py:send():234] send: summary
+2022-07-29 18:57:07,233 INFO    SenderThread:165164 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 18:57:07,233 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:57:07,233 INFO    SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-29 18:57:07,233 INFO    SenderThread:165164 [sender.py:transition_state():387] send defer: 4
+2022-07-29 18:57:07,233 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:57:07,233 INFO    HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-29 18:57:07,234 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:57:07,234 INFO    SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-29 18:57:07,254 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:57:07,439 INFO    SenderThread:165164 [sender.py:transition_state():387] send defer: 5
+2022-07-29 18:57:07,439 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:57:07,439 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:57:07,439 INFO    HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-29 18:57:07,440 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:57:07,440 INFO    SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-29 18:57:07,440 INFO    SenderThread:165164 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-29 18:57:07,540 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:57:07,760 INFO    Thread-8  :165164 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/config.yaml
+2022-07-29 18:57:07,761 INFO    SenderThread:165164 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/wandb-summary.json
+2022-07-29 18:57:07,761 INFO    SenderThread:165164 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/output.log
+2022-07-29 18:57:07,761 INFO    SenderThread:165164 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files
+2022-07-29 18:57:07,761 INFO    SenderThread:165164 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/config.yaml config.yaml
+2022-07-29 18:57:07,761 INFO    SenderThread:165164 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/requirements.txt requirements.txt
+2022-07-29 18:57:07,762 INFO    SenderThread:165164 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/output.log output.log
+2022-07-29 18:57:07,762 INFO    SenderThread:165164 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/wandb-summary.json wandb-summary.json
+2022-07-29 18:57:07,762 INFO    SenderThread:165164 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/wandb-metadata.json wandb-metadata.json
+2022-07-29 18:57:07,765 INFO    SenderThread:165164 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-29 18:57:07,765 INFO    SenderThread:165164 [sender.py:transition_state():387] send defer: 6
+2022-07-29 18:57:07,765 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:57:07,766 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:57:07,766 INFO    HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-29 18:57:07,768 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:57:07,768 INFO    SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-29 18:57:07,768 INFO    SenderThread:165164 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 18:57:07,867 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:57:07,867 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:57:07,968 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:57:07,968 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:57:08,070 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:57:08,070 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:57:08,171 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:57:08,171 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:57:08,235 INFO    Thread-16 :165164 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/wandb-summary.json
+2022-07-29 18:57:08,266 INFO    Thread-13 :165164 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/config.yaml
+2022-07-29 18:57:08,268 INFO    Thread-15 :165164 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/output.log
+2022-07-29 18:57:08,273 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:57:08,273 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:57:08,324 INFO    Thread-14 :165164 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/files/requirements.txt
+2022-07-29 18:57:08,374 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:57:08,374 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:57:08,476 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:57:08,476 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:57:08,524 INFO    Thread-7  :165164 [sender.py:transition_state():387] send defer: 7
+2022-07-29 18:57:08,524 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:57:08,524 INFO    HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-29 18:57:08,525 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:57:08,525 INFO    SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-29 18:57:08,577 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:57:08,677 INFO    SenderThread:165164 [sender.py:transition_state():387] send defer: 8
+2022-07-29 18:57:08,677 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:57:08,677 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:57:08,677 INFO    HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-29 18:57:08,677 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:57:08,677 INFO    SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-29 18:57:08,677 INFO    SenderThread:165164 [sender.py:transition_state():387] send defer: 9
+2022-07-29 18:57:08,678 DEBUG   SenderThread:165164 [sender.py:send():234] send: final
+2022-07-29 18:57:08,678 DEBUG   SenderThread:165164 [sender.py:send():234] send: footer
+2022-07-29 18:57:08,678 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 18:57:08,678 INFO    HandlerThread:165164 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-29 18:57:08,678 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: defer
+2022-07-29 18:57:08,678 INFO    SenderThread:165164 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-29 18:57:08,778 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 18:57:08,778 DEBUG   SenderThread:165164 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 18:57:08,778 INFO    SenderThread:165164 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 18:57:09,031 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-29 18:57:09,032 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-29 18:57:09,032 DEBUG   HandlerThread:165164 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-29 18:57:09,032 INFO    HandlerThread:165164 [handler.py:finish():731] shutting down handler
+2022-07-29 18:57:09,678 INFO    WriterThread:165164 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/run-3cqptots.wandb
+2022-07-29 18:57:10,030 INFO    SenderThread:165164 [sender.py:finish():1070] shutting down sender
+2022-07-29 18:57:10,030 INFO    SenderThread:165164 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 18:57:10,030 INFO    SenderThread:165164 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 18:57:10,033 INFO    MainThread:165164 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220729_185656-3cqptots/logs/debug.log b/wandb/run-20220729_185656-3cqptots/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..9068a1c0745ef93961abd3bfbf0149df54529872
--- /dev/null
+++ b/wandb/run-20220729_185656-3cqptots/logs/debug.log
@@ -0,0 +1,139 @@
+2022-07-29 18:56:56,699 INFO    MainThread:163875 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-29 18:56:56,700 INFO    MainThread:163875 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-29 18:56:56,700 INFO    MainThread:163875 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/logs/debug.log
+2022-07-29 18:56:56,700 INFO    MainThread:163875 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_185656-3cqptots/logs/debug-internal.log
+2022-07-29 18:56:56,700 INFO    MainThread:163875 [wandb_init.py:init():404] calling init triggers
+2022-07-29 18:56:56,700 INFO    MainThread:163875 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-29 18:56:56,700 INFO    MainThread:163875 [wandb_init.py:init():460] starting backend
+2022-07-29 18:56:56,700 INFO    MainThread:163875 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-29 18:56:56,728 INFO    MainThread:163875 [backend.py:ensure_launched():216] starting backend process...
+2022-07-29 18:56:56,753 INFO    MainThread:163875 [backend.py:ensure_launched():221] started backend process with pid: 165164
+2022-07-29 18:56:56,754 INFO    MainThread:163875 [wandb_init.py:init():469] backend started and connected
+2022-07-29 18:56:56,770 INFO    MainThread:163875 [wandb_init.py:init():533] updated telemetry
+2022-07-29 18:56:56,833 INFO    MainThread:163875 [wandb_init.py:init():563] communicating current version
+2022-07-29 18:56:57,563 INFO    MainThread:163875 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-29 18:56:57,564 INFO    MainThread:163875 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-29 18:56:57,751 INFO    MainThread:163875 [wandb_init.py:init():606] starting run threads in backend
+2022-07-29 18:57:00,108 INFO    MainThread:163875 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-29 18:57:00,108 INFO    MainThread:163875 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-29 18:57:00,109 INFO    MainThread:163875 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-29 18:57:00,111 INFO    MainThread:163875 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-29 18:57:00,111 INFO    MainThread:163875 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-29 18:57:04,664 INFO    MainThread:163875 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-29 18:57:04,668 INFO    MainThread:163875 [wandb_run.py:_restore():1752] restore
+2022-07-29 18:57:07,153 INFO    MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 73701
+}
+
+2022-07-29 18:57:07,439 INFO    MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 73701
+}
+
+2022-07-29 18:57:07,766 INFO    MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 85482
+}
+
+2022-07-29 18:57:07,867 INFO    MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 85482
+}
+
+2022-07-29 18:57:07,969 INFO    MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85482
+  total_bytes: 85482
+}
+
+2022-07-29 18:57:08,071 INFO    MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85482
+  total_bytes: 85482
+}
+
+2022-07-29 18:57:08,172 INFO    MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85482
+  total_bytes: 85482
+}
+
+2022-07-29 18:57:08,273 INFO    MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85482
+  total_bytes: 85482
+}
+
+2022-07-29 18:57:08,375 INFO    MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85482
+  total_bytes: 85482
+}
+
+2022-07-29 18:57:08,476 INFO    MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85482
+  total_bytes: 85482
+}
+
+2022-07-29 18:57:08,677 INFO    MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85482
+  total_bytes: 85482
+}
+
+2022-07-29 18:57:09,030 INFO    MainThread:163875 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85482
+  total_bytes: 85482
+}
+local_info {
+}
+
+2022-07-29 18:57:10,492 INFO    MainThread:163875 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220729_185656-3cqptots/run-3cqptots.wandb b/wandb/run-20220729_185656-3cqptots/run-3cqptots.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..d7154a03bc448ca82531e4cd650ba066904bae17
--- /dev/null
+++ b/wandb/run-20220729_185656-3cqptots/run-3cqptots.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:375e2b86666767f13b3eeedcc308447a00f857d25fdb68663f6847ba93879a70
+size 9434
diff --git a/wandb/run-20220729_190139-3h19ae7o/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_190139-3h19ae7o/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe
--- /dev/null
+++ b/wandb/run-20220729_190139-3h19ae7o/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1596 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220729_190139-3h19ae7o/files/config.yaml b/wandb/run-20220729_190139-3h19ae7o/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ebac1590d3ae1282df9c04e12acc5c399dcd15ce
--- /dev/null
+++ b/wandb/run-20220729_190139-3h19ae7o/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659121299
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220729_190139-3h19ae7o/files/output.log b/wandb/run-20220729_190139-3h19ae7o/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..65b7121bb800f0470c1c62bbb9888355968ba245
--- /dev/null
+++ b/wandb/run-20220729_190139-3h19ae7o/files/output.log
@@ -0,0 +1,158 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul29_19-01-35_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=32,
+per_device_train_batch_size=32,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 85.67it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 450.21it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+  0%|                                                                                                                                                                                                                                                             | 0/256 [00:00<?, ?ba/s]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1596, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 870, in main
+    raw_datasets = make_dataset(seed=training_args.seed)
+  File "run_flax_speech_recognition_ctc.py", line 811, in make_dataset
+    nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed)
+  File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 865, in filter
+    {
+  File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 866, in <dictcomp>
+    k: dataset.filter(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper
+    out = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2904, in filter
+    indices = self.map(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2387, in map
+    return self._map_single(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 557, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper
+    out = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2775, in _map_single
+    batch = apply_function_on_filtered_inputs(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2655, in apply_function_on_filtered_inputs
+    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2347, in decorated
+    result = f(decorated_item, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in get_indices_from_mask_function
+    example = {key: batch[key][i] for key in batch}
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in <dictcomp>
+    example = {key: batch[key][i] for key in batch}
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 133, in __getitem__
+    values = [
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 134, in <listcomp>
+    decode_nested_example(self.features[key], value) if value is not None else None for value in values
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1260, in decode_nested_example
+    return schema.decode_example(obj, token_per_repo_id=token_per_repo_id) if obj is not None else None
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 144, in decode_example
+    array, sampling_rate = self._decode_mp3(file if file else path)
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 293, in _decode_mp3
+    array, sampling_rate = torchaudio.load(path_or_file, format="mp3")
+  File "/data/flax/lib/python3.8/site-packages/torchaudio/backend/sox_io_backend.py", line 214, in load
+    return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format)
+NameError: name '_fallback_load_fileobj' is not defined
\ No newline at end of file
diff --git a/wandb/run-20220729_190139-3h19ae7o/files/requirements.txt b/wandb/run-20220729_190139-3h19ae7o/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f28978ffa0f0a8356a51ec97122622a6097b70e8
--- /dev/null
+++ b/wandb/run-20220729_190139-3h19ae7o/files/requirements.txt
@@ -0,0 +1,151 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pygments==2.11.1
+pyparsing==3.0.6
+python-dateutil==2.8.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220729_190139-3h19ae7o/files/wandb-metadata.json b/wandb/run-20220729_190139-3h19ae7o/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..8f1072c53903ffa2d648d83b22eae0a97ba719b2
--- /dev/null
+++ b/wandb/run-20220729_190139-3h19ae7o/files/wandb-metadata.json
@@ -0,0 +1,66 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-29T19:01:42.561481",
+    "startedAt": "2022-07-29T19:01:39.254799",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=32",
+        "--per_device_eval_batch_size=32",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220729_190139-3h19ae7o/files/wandb-summary.json b/wandb/run-20220729_190139-3h19ae7o/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..a594e25ada167c5ad54a828e8a5b1a6223620b44
--- /dev/null
+++ b/wandb/run-20220729_190139-3h19ae7o/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 8}}
\ No newline at end of file
diff --git a/wandb/run-20220729_190139-3h19ae7o/logs/debug-internal.log b/wandb/run-20220729_190139-3h19ae7o/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..70f5454fc7a56b910ebe3cd1e860e3f4857833a0
--- /dev/null
+++ b/wandb/run-20220729_190139-3h19ae7o/logs/debug-internal.log
@@ -0,0 +1,148 @@
+2022-07-29 19:01:40,118 INFO    MainThread:171545 [internal.py:wandb_internal():87] W&B internal server running at pid: 171545, started at: 2022-07-29 19:01:40.117863
+2022-07-29 19:01:40,120 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: check_version
+2022-07-29 19:01:40,120 INFO    WriterThread:171545 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/run-3h19ae7o.wandb
+2022-07-29 19:01:40,121 DEBUG   SenderThread:171545 [sender.py:send():234] send: header
+2022-07-29 19:01:40,121 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: check_version
+2022-07-29 19:01:40,160 DEBUG   SenderThread:171545 [sender.py:send():234] send: run
+2022-07-29 19:01:40,329 INFO    SenderThread:171545 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files
+2022-07-29 19:01:40,329 INFO    SenderThread:171545 [sender.py:_start_run_threads():804] run started: 3h19ae7o with start time 1659121299
+2022-07-29 19:01:40,329 DEBUG   SenderThread:171545 [sender.py:send():234] send: summary
+2022-07-29 19:01:40,329 INFO    SenderThread:171545 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 19:01:40,330 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: run_start
+2022-07-29 19:01:41,331 INFO    Thread-8  :171545 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/wandb-summary.json
+2022-07-29 19:01:42,561 DEBUG   HandlerThread:171545 [meta.py:__init__():40] meta init
+2022-07-29 19:01:42,561 DEBUG   HandlerThread:171545 [meta.py:__init__():54] meta init done
+2022-07-29 19:01:42,561 DEBUG   HandlerThread:171545 [meta.py:probe():214] probe
+2022-07-29 19:01:42,563 DEBUG   HandlerThread:171545 [meta.py:_setup_git():204] setup git
+2022-07-29 19:01:42,595 DEBUG   HandlerThread:171545 [meta.py:_setup_git():211] setup git done
+2022-07-29 19:01:42,595 DEBUG   HandlerThread:171545 [meta.py:_save_code():92] save code
+2022-07-29 19:01:42,606 DEBUG   HandlerThread:171545 [meta.py:_save_code():113] save code done
+2022-07-29 19:01:42,607 DEBUG   HandlerThread:171545 [meta.py:_save_patches():130] save patches
+2022-07-29 19:01:42,661 DEBUG   HandlerThread:171545 [meta.py:_save_patches():172] save patches done
+2022-07-29 19:01:42,661 DEBUG   HandlerThread:171545 [meta.py:_save_pip():58] save pip
+2022-07-29 19:01:42,662 DEBUG   HandlerThread:171545 [meta.py:_save_pip():72] save pip done
+2022-07-29 19:01:42,662 DEBUG   HandlerThread:171545 [meta.py:probe():252] probe done
+2022-07-29 19:01:42,665 DEBUG   SenderThread:171545 [sender.py:send():234] send: files
+2022-07-29 19:01:42,665 INFO    SenderThread:171545 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-29 19:01:42,666 INFO    SenderThread:171545 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-29 19:01:42,671 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:01:42,672 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:01:43,146 INFO    Thread-11 :171545 [upload_job.py:push():137] Uploaded file /tmp/tmpw0fdeimbwandb/1v9fa8k6-wandb-metadata.json
+2022-07-29 19:01:43,334 INFO    Thread-8  :171545 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/output.log
+2022-07-29 19:01:43,334 INFO    Thread-8  :171545 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/requirements.txt
+2022-07-29 19:01:43,334 INFO    Thread-8  :171545 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/code/run_flax_speech_recognition_ctc.py
+2022-07-29 19:01:43,334 INFO    Thread-8  :171545 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/wandb-metadata.json
+2022-07-29 19:01:43,334 INFO    Thread-8  :171545 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/code
+2022-07-29 19:01:43,355 INFO    Thread-12 :171545 [upload_job.py:push():137] Uploaded file /tmp/tmpw0fdeimbwandb/2vqa3jvn-code/run_flax_speech_recognition_ctc.py
+2022-07-29 19:01:45,335 INFO    Thread-8  :171545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/output.log
+2022-07-29 19:01:47,335 INFO    Thread-8  :171545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/output.log
+2022-07-29 19:01:49,207 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:01:49,336 INFO    Thread-8  :171545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/output.log
+2022-07-29 19:01:49,737 DEBUG   SenderThread:171545 [sender.py:send():234] send: telemetry
+2022-07-29 19:01:49,737 DEBUG   SenderThread:171545 [sender.py:send():234] send: exit
+2022-07-29 19:01:49,739 INFO    SenderThread:171545 [sender.py:send_exit():366] handling exit code: 1
+2022-07-29 19:01:49,739 INFO    SenderThread:171545 [sender.py:send_exit():368] handling runtime: 8
+2022-07-29 19:01:49,740 INFO    SenderThread:171545 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 19:01:49,740 INFO    SenderThread:171545 [sender.py:send_exit():374] send defer
+2022-07-29 19:01:49,740 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:01:49,741 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:01:49,741 INFO    HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-29 19:01:49,741 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:01:49,741 INFO    SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-29 19:01:49,741 INFO    SenderThread:171545 [sender.py:transition_state():387] send defer: 1
+2022-07-29 19:01:49,741 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:01:49,742 INFO    HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-29 19:01:49,792 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:01:49,793 INFO    SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-29 19:01:49,793 INFO    SenderThread:171545 [sender.py:transition_state():387] send defer: 2
+2022-07-29 19:01:49,793 DEBUG   SenderThread:171545 [sender.py:send():234] send: stats
+2022-07-29 19:01:49,793 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:01:49,793 INFO    HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-29 19:01:49,793 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:01:49,793 INFO    SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-29 19:01:49,793 INFO    SenderThread:171545 [sender.py:transition_state():387] send defer: 3
+2022-07-29 19:01:49,794 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:01:49,794 INFO    HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-29 19:01:49,794 DEBUG   SenderThread:171545 [sender.py:send():234] send: summary
+2022-07-29 19:01:49,794 INFO    SenderThread:171545 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 19:01:49,794 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:01:49,794 INFO    SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-29 19:01:49,794 INFO    SenderThread:171545 [sender.py:transition_state():387] send defer: 4
+2022-07-29 19:01:49,795 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:01:49,795 INFO    HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-29 19:01:49,795 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:01:49,795 INFO    SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-29 19:01:49,842 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:01:49,961 INFO    SenderThread:171545 [sender.py:transition_state():387] send defer: 5
+2022-07-29 19:01:49,961 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:01:49,962 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:01:49,962 INFO    HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-29 19:01:49,962 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:01:49,962 INFO    SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-29 19:01:49,962 INFO    SenderThread:171545 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-29 19:01:50,063 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:01:50,337 INFO    Thread-8  :171545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/wandb-summary.json
+2022-07-29 19:01:50,337 INFO    SenderThread:171545 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/config.yaml
+2022-07-29 19:01:50,338 INFO    SenderThread:171545 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files
+2022-07-29 19:01:50,338 INFO    SenderThread:171545 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/config.yaml config.yaml
+2022-07-29 19:01:50,338 INFO    SenderThread:171545 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/requirements.txt requirements.txt
+2022-07-29 19:01:50,338 INFO    SenderThread:171545 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/output.log output.log
+2022-07-29 19:01:50,338 INFO    SenderThread:171545 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/wandb-summary.json wandb-summary.json
+2022-07-29 19:01:50,338 INFO    SenderThread:171545 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/wandb-metadata.json wandb-metadata.json
+2022-07-29 19:01:50,339 INFO    SenderThread:171545 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-29 19:01:50,339 INFO    SenderThread:171545 [sender.py:transition_state():387] send defer: 6
+2022-07-29 19:01:50,339 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:01:50,344 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:01:50,344 INFO    HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-29 19:01:50,345 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:01:50,345 INFO    SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-29 19:01:50,345 INFO    SenderThread:171545 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 19:01:50,446 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:01:50,446 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:01:50,548 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:01:50,548 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:01:50,650 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:01:50,650 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:01:50,752 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:01:50,752 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:01:50,829 INFO    Thread-13 :171545 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/config.yaml
+2022-07-29 19:01:50,839 INFO    Thread-15 :171545 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/output.log
+2022-07-29 19:01:50,854 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:01:50,854 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:01:50,862 INFO    Thread-16 :171545 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/wandb-summary.json
+2022-07-29 19:01:50,877 INFO    Thread-14 :171545 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/files/requirements.txt
+2022-07-29 19:01:50,955 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:01:50,956 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:01:51,057 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:01:51,058 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:01:51,078 INFO    Thread-7  :171545 [sender.py:transition_state():387] send defer: 7
+2022-07-29 19:01:51,078 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:01:51,078 INFO    HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-29 19:01:51,078 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:01:51,078 INFO    SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-29 19:01:51,159 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:01:51,533 INFO    SenderThread:171545 [sender.py:transition_state():387] send defer: 8
+2022-07-29 19:01:51,533 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:01:51,534 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:01:51,534 INFO    HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-29 19:01:51,534 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:01:51,534 INFO    SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-29 19:01:51,534 INFO    SenderThread:171545 [sender.py:transition_state():387] send defer: 9
+2022-07-29 19:01:51,535 DEBUG   SenderThread:171545 [sender.py:send():234] send: final
+2022-07-29 19:01:51,535 DEBUG   SenderThread:171545 [sender.py:send():234] send: footer
+2022-07-29 19:01:51,535 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:01:51,535 INFO    HandlerThread:171545 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-29 19:01:51,535 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:01:51,536 INFO    SenderThread:171545 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-29 19:01:51,635 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:01:51,636 DEBUG   SenderThread:171545 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:01:51,636 INFO    SenderThread:171545 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 19:01:51,944 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-29 19:01:51,945 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-29 19:01:51,946 DEBUG   HandlerThread:171545 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-29 19:01:51,946 INFO    HandlerThread:171545 [handler.py:finish():731] shutting down handler
+2022-07-29 19:01:52,536 INFO    WriterThread:171545 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/run-3h19ae7o.wandb
+2022-07-29 19:01:52,943 INFO    SenderThread:171545 [sender.py:finish():1070] shutting down sender
+2022-07-29 19:01:52,943 INFO    SenderThread:171545 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 19:01:52,943 INFO    SenderThread:171545 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 19:01:52,945 INFO    MainThread:171545 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220729_190139-3h19ae7o/logs/debug.log b/wandb/run-20220729_190139-3h19ae7o/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..6c6e24bdd427c5deafb542d5aaee9fb2c9cb530b
--- /dev/null
+++ b/wandb/run-20220729_190139-3h19ae7o/logs/debug.log
@@ -0,0 +1,139 @@
+2022-07-29 19:01:39,256 INFO    MainThread:170288 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-29 19:01:39,256 INFO    MainThread:170288 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-29 19:01:39,256 INFO    MainThread:170288 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/logs/debug.log
+2022-07-29 19:01:39,256 INFO    MainThread:170288 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190139-3h19ae7o/logs/debug-internal.log
+2022-07-29 19:01:39,256 INFO    MainThread:170288 [wandb_init.py:init():404] calling init triggers
+2022-07-29 19:01:39,256 INFO    MainThread:170288 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-29 19:01:39,256 INFO    MainThread:170288 [wandb_init.py:init():460] starting backend
+2022-07-29 19:01:39,256 INFO    MainThread:170288 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-29 19:01:39,285 INFO    MainThread:170288 [backend.py:ensure_launched():216] starting backend process...
+2022-07-29 19:01:39,312 INFO    MainThread:170288 [backend.py:ensure_launched():221] started backend process with pid: 171545
+2022-07-29 19:01:39,314 INFO    MainThread:170288 [wandb_init.py:init():469] backend started and connected
+2022-07-29 19:01:39,327 INFO    MainThread:170288 [wandb_init.py:init():533] updated telemetry
+2022-07-29 19:01:39,390 INFO    MainThread:170288 [wandb_init.py:init():563] communicating current version
+2022-07-29 19:01:40,159 INFO    MainThread:170288 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-29 19:01:40,159 INFO    MainThread:170288 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-29 19:01:40,330 INFO    MainThread:170288 [wandb_init.py:init():606] starting run threads in backend
+2022-07-29 19:01:42,671 INFO    MainThread:170288 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-29 19:01:42,671 INFO    MainThread:170288 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-29 19:01:42,672 INFO    MainThread:170288 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-29 19:01:42,674 INFO    MainThread:170288 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-29 19:01:42,674 INFO    MainThread:170288 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-29 19:01:47,172 INFO    MainThread:170288 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-29 19:01:47,176 INFO    MainThread:170288 [wandb_run.py:_restore():1752] restore
+2022-07-29 19:01:49,741 INFO    MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 73701
+}
+
+2022-07-29 19:01:49,962 INFO    MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 73701
+}
+
+2022-07-29 19:01:50,345 INFO    MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 85493
+}
+
+2022-07-29 19:01:50,447 INFO    MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 85493
+}
+
+2022-07-29 19:01:50,549 INFO    MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85493
+  total_bytes: 85493
+}
+
+2022-07-29 19:01:50,651 INFO    MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85493
+  total_bytes: 85493
+}
+
+2022-07-29 19:01:50,753 INFO    MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85493
+  total_bytes: 85493
+}
+
+2022-07-29 19:01:50,855 INFO    MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85493
+  total_bytes: 85493
+}
+
+2022-07-29 19:01:50,956 INFO    MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85493
+  total_bytes: 85493
+}
+
+2022-07-29 19:01:51,058 INFO    MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85493
+  total_bytes: 85493
+}
+
+2022-07-29 19:01:51,534 INFO    MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85493
+  total_bytes: 85493
+}
+
+2022-07-29 19:01:51,943 INFO    MainThread:170288 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85493
+  total_bytes: 85493
+}
+local_info {
+}
+
+2022-07-29 19:01:53,465 INFO    MainThread:170288 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220729_190139-3h19ae7o/run-3h19ae7o.wandb b/wandb/run-20220729_190139-3h19ae7o/run-3h19ae7o.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..613942396072c70d70b572e40bd0ee85621fff3f
--- /dev/null
+++ b/wandb/run-20220729_190139-3h19ae7o/run-3h19ae7o.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d731128588ab4151c70d7ea1d808dec40df947170760cc941be5a80239eb9f9
+size 9438
diff --git a/wandb/run-20220729_190441-29m5vt4h/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_190441-29m5vt4h/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe
--- /dev/null
+++ b/wandb/run-20220729_190441-29m5vt4h/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1596 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220729_190441-29m5vt4h/files/config.yaml b/wandb/run-20220729_190441-29m5vt4h/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..538fbcf8db2bff202565b523a13c42281ec62c2c
--- /dev/null
+++ b/wandb/run-20220729_190441-29m5vt4h/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659121481
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220729_190441-29m5vt4h/files/output.log b/wandb/run-20220729_190441-29m5vt4h/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..b8772249f7609f1f06768913439ff58ee5f273c2
--- /dev/null
+++ b/wandb/run-20220729_190441-29m5vt4h/files/output.log
@@ -0,0 +1,158 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul29_19-04-37_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=32,
+per_device_train_batch_size=32,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 83.41it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 468.45it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+  0%|                                                                                                                                                                                                                                                             | 0/256 [00:00<?, ?ba/s]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1596, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 870, in main
+    raw_datasets = make_dataset(seed=training_args.seed)
+  File "run_flax_speech_recognition_ctc.py", line 811, in make_dataset
+    nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed)
+  File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 865, in filter
+    {
+  File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 866, in <dictcomp>
+    k: dataset.filter(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper
+    out = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2904, in filter
+    indices = self.map(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2387, in map
+    return self._map_single(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 557, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper
+    out = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2775, in _map_single
+    batch = apply_function_on_filtered_inputs(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2655, in apply_function_on_filtered_inputs
+    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2347, in decorated
+    result = f(decorated_item, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in get_indices_from_mask_function
+    example = {key: batch[key][i] for key in batch}
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in <dictcomp>
+    example = {key: batch[key][i] for key in batch}
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 133, in __getitem__
+    values = [
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 134, in <listcomp>
+    decode_nested_example(self.features[key], value) if value is not None else None for value in values
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1260, in decode_nested_example
+    return schema.decode_example(obj, token_per_repo_id=token_per_repo_id) if obj is not None else None
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 144, in decode_example
+    array, sampling_rate = self._decode_mp3(file if file else path)
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 293, in _decode_mp3
+    array, sampling_rate = torchaudio.load(path_or_file, format="mp3")
+  File "/data/flax/lib/python3.8/site-packages/torchaudio/backend/sox_io_backend.py", line 214, in load
+    return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format)
+NameError: name '_fallback_load_fileobj' is not defined
\ No newline at end of file
diff --git a/wandb/run-20220729_190441-29m5vt4h/files/requirements.txt b/wandb/run-20220729_190441-29m5vt4h/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f28978ffa0f0a8356a51ec97122622a6097b70e8
--- /dev/null
+++ b/wandb/run-20220729_190441-29m5vt4h/files/requirements.txt
@@ -0,0 +1,151 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pygments==2.11.1
+pyparsing==3.0.6
+python-dateutil==2.8.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220729_190441-29m5vt4h/files/wandb-metadata.json b/wandb/run-20220729_190441-29m5vt4h/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..922996f7b39924b8a9d6e7dbb0ffdb069ca569aa
--- /dev/null
+++ b/wandb/run-20220729_190441-29m5vt4h/files/wandb-metadata.json
@@ -0,0 +1,66 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-29T19:04:44.947822",
+    "startedAt": "2022-07-29T19:04:41.750886",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=32",
+        "--per_device_eval_batch_size=32",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220729_190441-29m5vt4h/files/wandb-summary.json b/wandb/run-20220729_190441-29m5vt4h/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..a594e25ada167c5ad54a828e8a5b1a6223620b44
--- /dev/null
+++ b/wandb/run-20220729_190441-29m5vt4h/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 8}}
\ No newline at end of file
diff --git a/wandb/run-20220729_190441-29m5vt4h/logs/debug-internal.log b/wandb/run-20220729_190441-29m5vt4h/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..4fbe4426a20a69a5ad22e1918dfe03cc516b30ea
--- /dev/null
+++ b/wandb/run-20220729_190441-29m5vt4h/logs/debug-internal.log
@@ -0,0 +1,170 @@
+2022-07-29 19:04:42,573 INFO    MainThread:176849 [internal.py:wandb_internal():87] W&B internal server running at pid: 176849, started at: 2022-07-29 19:04:42.573053
+2022-07-29 19:04:42,575 INFO    WriterThread:176849 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/run-29m5vt4h.wandb
+2022-07-29 19:04:42,575 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: check_version
+2022-07-29 19:04:42,576 DEBUG   SenderThread:176849 [sender.py:send():234] send: header
+2022-07-29 19:04:42,576 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: check_version
+2022-07-29 19:04:42,610 DEBUG   SenderThread:176849 [sender.py:send():234] send: run
+2022-07-29 19:04:42,771 INFO    SenderThread:176849 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files
+2022-07-29 19:04:42,771 INFO    SenderThread:176849 [sender.py:_start_run_threads():804] run started: 29m5vt4h with start time 1659121481
+2022-07-29 19:04:42,772 DEBUG   SenderThread:176849 [sender.py:send():234] send: summary
+2022-07-29 19:04:42,772 INFO    SenderThread:176849 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 19:04:42,773 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: run_start
+2022-07-29 19:04:43,773 INFO    Thread-8  :176849 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/wandb-summary.json
+2022-07-29 19:04:44,947 DEBUG   HandlerThread:176849 [meta.py:__init__():40] meta init
+2022-07-29 19:04:44,947 DEBUG   HandlerThread:176849 [meta.py:__init__():54] meta init done
+2022-07-29 19:04:44,947 DEBUG   HandlerThread:176849 [meta.py:probe():214] probe
+2022-07-29 19:04:44,948 DEBUG   HandlerThread:176849 [meta.py:_setup_git():204] setup git
+2022-07-29 19:04:44,978 DEBUG   HandlerThread:176849 [meta.py:_setup_git():211] setup git done
+2022-07-29 19:04:44,978 DEBUG   HandlerThread:176849 [meta.py:_save_code():92] save code
+2022-07-29 19:04:44,989 DEBUG   HandlerThread:176849 [meta.py:_save_code():113] save code done
+2022-07-29 19:04:44,989 DEBUG   HandlerThread:176849 [meta.py:_save_patches():130] save patches
+2022-07-29 19:04:45,043 DEBUG   HandlerThread:176849 [meta.py:_save_patches():172] save patches done
+2022-07-29 19:04:45,044 DEBUG   HandlerThread:176849 [meta.py:_save_pip():58] save pip
+2022-07-29 19:04:45,044 DEBUG   HandlerThread:176849 [meta.py:_save_pip():72] save pip done
+2022-07-29 19:04:45,044 DEBUG   HandlerThread:176849 [meta.py:probe():252] probe done
+2022-07-29 19:04:45,047 DEBUG   SenderThread:176849 [sender.py:send():234] send: files
+2022-07-29 19:04:45,047 INFO    SenderThread:176849 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-29 19:04:45,047 INFO    SenderThread:176849 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-29 19:04:45,053 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:04:45,053 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:04:45,512 INFO    Thread-11 :176849 [upload_job.py:push():137] Uploaded file /tmp/tmp8vey1y7dwandb/10dh41rh-wandb-metadata.json
+2022-07-29 19:04:45,767 INFO    Thread-12 :176849 [upload_job.py:push():137] Uploaded file /tmp/tmp8vey1y7dwandb/31d5qx94-code/run_flax_speech_recognition_ctc.py
+2022-07-29 19:04:45,793 INFO    Thread-8  :176849 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/requirements.txt
+2022-07-29 19:04:45,793 INFO    Thread-8  :176849 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/output.log
+2022-07-29 19:04:45,793 INFO    Thread-8  :176849 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/code/run_flax_speech_recognition_ctc.py
+2022-07-29 19:04:45,793 INFO    Thread-8  :176849 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/wandb-metadata.json
+2022-07-29 19:04:45,793 INFO    Thread-8  :176849 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/code
+2022-07-29 19:04:47,794 INFO    Thread-8  :176849 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/output.log
+2022-07-29 19:04:49,794 INFO    Thread-8  :176849 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/output.log
+2022-07-29 19:04:51,589 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:51,589 DEBUG   SenderThread:176849 [sender.py:send():234] send: telemetry
+2022-07-29 19:04:51,589 DEBUG   SenderThread:176849 [sender.py:send():234] send: exit
+2022-07-29 19:04:51,589 INFO    SenderThread:176849 [sender.py:send_exit():366] handling exit code: 1
+2022-07-29 19:04:51,589 INFO    SenderThread:176849 [sender.py:send_exit():368] handling runtime: 8
+2022-07-29 19:04:51,590 INFO    SenderThread:176849 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 19:04:51,590 INFO    SenderThread:176849 [sender.py:send_exit():374] send defer
+2022-07-29 19:04:51,590 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:51,591 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:04:51,591 INFO    HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-29 19:04:51,591 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:04:51,591 INFO    SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-29 19:04:51,591 INFO    SenderThread:176849 [sender.py:transition_state():387] send defer: 1
+2022-07-29 19:04:51,591 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:04:51,591 INFO    HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-29 19:04:51,676 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:04:51,676 INFO    SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-29 19:04:51,676 INFO    SenderThread:176849 [sender.py:transition_state():387] send defer: 2
+2022-07-29 19:04:51,676 DEBUG   SenderThread:176849 [sender.py:send():234] send: stats
+2022-07-29 19:04:51,677 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:04:51,677 INFO    HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-29 19:04:51,677 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:04:51,677 INFO    SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-29 19:04:51,677 INFO    SenderThread:176849 [sender.py:transition_state():387] send defer: 3
+2022-07-29 19:04:51,677 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:04:51,677 INFO    HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-29 19:04:51,677 DEBUG   SenderThread:176849 [sender.py:send():234] send: summary
+2022-07-29 19:04:51,678 INFO    SenderThread:176849 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 19:04:51,678 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:04:51,678 INFO    SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-29 19:04:51,678 INFO    SenderThread:176849 [sender.py:transition_state():387] send defer: 4
+2022-07-29 19:04:51,678 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:04:51,678 INFO    HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-29 19:04:51,678 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:04:51,678 INFO    SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-29 19:04:51,692 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:51,795 INFO    Thread-8  :176849 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/output.log
+2022-07-29 19:04:51,795 INFO    Thread-8  :176849 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/wandb-summary.json
+2022-07-29 19:04:51,839 INFO    SenderThread:176849 [sender.py:transition_state():387] send defer: 5
+2022-07-29 19:04:51,839 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:51,839 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:04:51,840 INFO    HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-29 19:04:51,840 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:04:51,840 INFO    SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-29 19:04:51,840 INFO    SenderThread:176849 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-29 19:04:51,940 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:52,796 INFO    Thread-8  :176849 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/config.yaml
+2022-07-29 19:04:52,796 INFO    SenderThread:176849 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files
+2022-07-29 19:04:52,796 INFO    SenderThread:176849 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/config.yaml config.yaml
+2022-07-29 19:04:52,796 INFO    SenderThread:176849 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/requirements.txt requirements.txt
+2022-07-29 19:04:52,797 INFO    SenderThread:176849 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/output.log output.log
+2022-07-29 19:04:52,797 INFO    SenderThread:176849 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/wandb-summary.json wandb-summary.json
+2022-07-29 19:04:52,797 INFO    SenderThread:176849 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/wandb-metadata.json wandb-metadata.json
+2022-07-29 19:04:52,802 INFO    SenderThread:176849 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-29 19:04:52,802 INFO    SenderThread:176849 [sender.py:transition_state():387] send defer: 6
+2022-07-29 19:04:52,802 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:52,804 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:04:52,804 INFO    HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-29 19:04:52,808 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:04:52,808 INFO    SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-29 19:04:52,808 INFO    SenderThread:176849 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 19:04:52,904 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:52,905 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:53,006 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:53,006 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:53,108 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:53,108 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:53,209 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:53,209 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:53,259 INFO    Thread-15 :176849 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/output.log
+2022-07-29 19:04:53,272 INFO    Thread-16 :176849 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/wandb-summary.json
+2022-07-29 19:04:53,276 INFO    Thread-13 :176849 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/config.yaml
+2022-07-29 19:04:53,311 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:53,311 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:53,412 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:53,412 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:53,514 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:53,514 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:53,615 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:53,616 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:53,717 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:53,717 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:53,818 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:53,819 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:53,920 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:53,920 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:54,021 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:54,021 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:54,123 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:54,123 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:54,224 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:54,225 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:54,326 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:54,326 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:54,428 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:54,428 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:54,455 INFO    Thread-14 :176849 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/files/requirements.txt
+2022-07-29 19:04:54,529 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:54,529 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:54,631 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:54,631 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:54,655 INFO    Thread-7  :176849 [sender.py:transition_state():387] send defer: 7
+2022-07-29 19:04:54,656 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:04:54,656 INFO    HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-29 19:04:54,656 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:04:54,656 INFO    SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-29 19:04:54,732 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:54,792 INFO    SenderThread:176849 [sender.py:transition_state():387] send defer: 8
+2022-07-29 19:04:54,792 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:54,792 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:04:54,792 INFO    HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-29 19:04:54,792 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:04:54,793 INFO    SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-29 19:04:54,793 INFO    SenderThread:176849 [sender.py:transition_state():387] send defer: 9
+2022-07-29 19:04:54,793 DEBUG   SenderThread:176849 [sender.py:send():234] send: final
+2022-07-29 19:04:54,793 DEBUG   SenderThread:176849 [sender.py:send():234] send: footer
+2022-07-29 19:04:54,793 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:04:54,793 INFO    HandlerThread:176849 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-29 19:04:54,793 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:04:54,793 INFO    SenderThread:176849 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-29 19:04:54,893 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:04:54,893 DEBUG   SenderThread:176849 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:04:54,893 INFO    SenderThread:176849 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 19:04:55,167 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-29 19:04:55,168 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-29 19:04:55,168 DEBUG   HandlerThread:176849 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-29 19:04:55,168 INFO    HandlerThread:176849 [handler.py:finish():731] shutting down handler
+2022-07-29 19:04:55,794 INFO    WriterThread:176849 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/run-29m5vt4h.wandb
+2022-07-29 19:04:56,166 INFO    SenderThread:176849 [sender.py:finish():1070] shutting down sender
+2022-07-29 19:04:56,166 INFO    SenderThread:176849 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 19:04:56,167 INFO    SenderThread:176849 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 19:04:56,169 INFO    MainThread:176849 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220729_190441-29m5vt4h/logs/debug.log b/wandb/run-20220729_190441-29m5vt4h/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..41baa6d8dbc1c310a3d6449dd7be2baa482e3b76
--- /dev/null
+++ b/wandb/run-20220729_190441-29m5vt4h/logs/debug.log
@@ -0,0 +1,238 @@
+2022-07-29 19:04:41,752 INFO    MainThread:175598 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-29 19:04:41,752 INFO    MainThread:175598 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-29 19:04:41,752 INFO    MainThread:175598 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/logs/debug.log
+2022-07-29 19:04:41,752 INFO    MainThread:175598 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190441-29m5vt4h/logs/debug-internal.log
+2022-07-29 19:04:41,752 INFO    MainThread:175598 [wandb_init.py:init():404] calling init triggers
+2022-07-29 19:04:41,752 INFO    MainThread:175598 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-29 19:04:41,752 INFO    MainThread:175598 [wandb_init.py:init():460] starting backend
+2022-07-29 19:04:41,752 INFO    MainThread:175598 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-29 19:04:41,779 INFO    MainThread:175598 [backend.py:ensure_launched():216] starting backend process...
+2022-07-29 19:04:41,803 INFO    MainThread:175598 [backend.py:ensure_launched():221] started backend process with pid: 176849
+2022-07-29 19:04:41,805 INFO    MainThread:175598 [wandb_init.py:init():469] backend started and connected
+2022-07-29 19:04:41,818 INFO    MainThread:175598 [wandb_init.py:init():533] updated telemetry
+2022-07-29 19:04:41,879 INFO    MainThread:175598 [wandb_init.py:init():563] communicating current version
+2022-07-29 19:04:42,609 INFO    MainThread:175598 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-29 19:04:42,609 INFO    MainThread:175598 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-29 19:04:42,773 INFO    MainThread:175598 [wandb_init.py:init():606] starting run threads in backend
+2022-07-29 19:04:45,051 INFO    MainThread:175598 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-29 19:04:45,051 INFO    MainThread:175598 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-29 19:04:45,052 INFO    MainThread:175598 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-29 19:04:45,054 INFO    MainThread:175598 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-29 19:04:45,054 INFO    MainThread:175598 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-29 19:04:49,520 INFO    MainThread:175598 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-29 19:04:49,524 INFO    MainThread:175598 [wandb_run.py:_restore():1752] restore
+2022-07-29 19:04:51,591 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 73701
+}
+
+2022-07-29 19:04:51,840 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 73701
+}
+
+2022-07-29 19:04:52,803 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:52,905 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:53,007 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 82799
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:53,108 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 82799
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:53,210 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 82799
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:53,311 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 82799
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:53,413 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 82799
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:53,515 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 82799
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:53,616 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 82799
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:53,718 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 82799
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:53,819 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 82799
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:53,921 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 82799
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:54,022 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 82799
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:54,124 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 82799
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:54,225 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85493
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:54,327 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85493
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:54,428 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85493
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:54,530 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85493
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:54,631 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85493
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:54,792 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85493
+  total_bytes: 85493
+}
+
+2022-07-29 19:04:55,167 INFO    MainThread:175598 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85493
+  total_bytes: 85493
+}
+local_info {
+}
+
+2022-07-29 19:04:56,631 INFO    MainThread:175598 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220729_190441-29m5vt4h/run-29m5vt4h.wandb b/wandb/run-20220729_190441-29m5vt4h/run-29m5vt4h.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..7481eac19a668d04a92b8cff4911d29fe22c8dbc
--- /dev/null
+++ b/wandb/run-20220729_190441-29m5vt4h/run-29m5vt4h.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c08406c1386cc17829bf56a85d38f767a77c48664b70f890e428b1b550ee7ef7
+size 9434
diff --git a/wandb/run-20220729_190622-1kplw9z9/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_190622-1kplw9z9/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe
--- /dev/null
+++ b/wandb/run-20220729_190622-1kplw9z9/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1596 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220729_190622-1kplw9z9/files/config.yaml b/wandb/run-20220729_190622-1kplw9z9/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cfdbfc2ca4b438beb573c675e79bdc0bafd680cd
--- /dev/null
+++ b/wandb/run-20220729_190622-1kplw9z9/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659121582
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220729_190622-1kplw9z9/files/output.log b/wandb/run-20220729_190622-1kplw9z9/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..b071e9229f06da56854b13cc2606bd0a86712a3b
--- /dev/null
+++ b/wandb/run-20220729_190622-1kplw9z9/files/output.log
@@ -0,0 +1,158 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul29_19-06-17_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=32,
+per_device_train_batch_size=32,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 81.67it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 459.65it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+  0%|                                                                                                                                                                                                                                                             | 0/256 [00:00<?, ?ba/s]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1596, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 870, in main
+    raw_datasets = make_dataset(seed=training_args.seed)
+  File "run_flax_speech_recognition_ctc.py", line 811, in make_dataset
+    nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed)
+  File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 865, in filter
+    {
+  File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 866, in <dictcomp>
+    k: dataset.filter(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper
+    out = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2904, in filter
+    indices = self.map(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2387, in map
+    return self._map_single(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 557, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper
+    out = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2775, in _map_single
+    batch = apply_function_on_filtered_inputs(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2655, in apply_function_on_filtered_inputs
+    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2347, in decorated
+    result = f(decorated_item, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in get_indices_from_mask_function
+    example = {key: batch[key][i] for key in batch}
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in <dictcomp>
+    example = {key: batch[key][i] for key in batch}
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 133, in __getitem__
+    values = [
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 134, in <listcomp>
+    decode_nested_example(self.features[key], value) if value is not None else None for value in values
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1260, in decode_nested_example
+    return schema.decode_example(obj, token_per_repo_id=token_per_repo_id) if obj is not None else None
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 144, in decode_example
+    array, sampling_rate = self._decode_mp3(file if file else path)
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 293, in _decode_mp3
+    array, sampling_rate = torchaudio.load(path_or_file, format="mp3")
+  File "/data/flax/lib/python3.8/site-packages/torchaudio/backend/sox_io_backend.py", line 214, in load
+    return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format)
+NameError: name '_fallback_load_fileobj' is not defined
\ No newline at end of file
diff --git a/wandb/run-20220729_190622-1kplw9z9/files/requirements.txt b/wandb/run-20220729_190622-1kplw9z9/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20cb6abc20ac8cace779da721986ddb6a238845d
--- /dev/null
+++ b/wandb/run-20220729_190622-1kplw9z9/files/requirements.txt
@@ -0,0 +1,151 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pygments==2.11.1
+pyparsing==3.0.6
+python-dateutil==2.8.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220729_190622-1kplw9z9/files/wandb-metadata.json b/wandb/run-20220729_190622-1kplw9z9/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..8d0b28461c5a702b9121f8ed0b59655e7e5df687
--- /dev/null
+++ b/wandb/run-20220729_190622-1kplw9z9/files/wandb-metadata.json
@@ -0,0 +1,66 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-29T19:06:25.890824",
+    "startedAt": "2022-07-29T19:06:22.537403",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=32",
+        "--per_device_eval_batch_size=32",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json b/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..a594e25ada167c5ad54a828e8a5b1a6223620b44
--- /dev/null
+++ b/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 8}}
\ No newline at end of file
diff --git a/wandb/run-20220729_190622-1kplw9z9/logs/debug-internal.log b/wandb/run-20220729_190622-1kplw9z9/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..4823c409a52b35a9998820cbbaf9675a4b616a49
--- /dev/null
+++ b/wandb/run-20220729_190622-1kplw9z9/logs/debug-internal.log
@@ -0,0 +1,149 @@
+2022-07-29 19:06:23,361 INFO    MainThread:179745 [internal.py:wandb_internal():87] W&B internal server running at pid: 179745, started at: 2022-07-29 19:06:23.361487
+2022-07-29 19:06:23,363 INFO    WriterThread:179745 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/run-1kplw9z9.wandb
+2022-07-29 19:06:23,364 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: check_version
+2022-07-29 19:06:23,364 DEBUG   SenderThread:179745 [sender.py:send():234] send: header
+2022-07-29 19:06:23,365 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: check_version
+2022-07-29 19:06:23,400 DEBUG   SenderThread:179745 [sender.py:send():234] send: run
+2022-07-29 19:06:23,577 INFO    SenderThread:179745 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files
+2022-07-29 19:06:23,577 INFO    SenderThread:179745 [sender.py:_start_run_threads():804] run started: 1kplw9z9 with start time 1659121582
+2022-07-29 19:06:23,578 DEBUG   SenderThread:179745 [sender.py:send():234] send: summary
+2022-07-29 19:06:23,578 INFO    SenderThread:179745 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 19:06:23,579 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: run_start
+2022-07-29 19:06:24,581 INFO    Thread-8  :179745 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json
+2022-07-29 19:06:25,890 DEBUG   HandlerThread:179745 [meta.py:__init__():40] meta init
+2022-07-29 19:06:25,890 DEBUG   HandlerThread:179745 [meta.py:__init__():54] meta init done
+2022-07-29 19:06:25,890 DEBUG   HandlerThread:179745 [meta.py:probe():214] probe
+2022-07-29 19:06:25,892 DEBUG   HandlerThread:179745 [meta.py:_setup_git():204] setup git
+2022-07-29 19:06:25,927 DEBUG   HandlerThread:179745 [meta.py:_setup_git():211] setup git done
+2022-07-29 19:06:25,927 DEBUG   HandlerThread:179745 [meta.py:_save_code():92] save code
+2022-07-29 19:06:25,940 DEBUG   HandlerThread:179745 [meta.py:_save_code():113] save code done
+2022-07-29 19:06:25,940 DEBUG   HandlerThread:179745 [meta.py:_save_patches():130] save patches
+2022-07-29 19:06:25,995 DEBUG   HandlerThread:179745 [meta.py:_save_patches():172] save patches done
+2022-07-29 19:06:25,995 DEBUG   HandlerThread:179745 [meta.py:_save_pip():58] save pip
+2022-07-29 19:06:25,996 DEBUG   HandlerThread:179745 [meta.py:_save_pip():72] save pip done
+2022-07-29 19:06:25,996 DEBUG   HandlerThread:179745 [meta.py:probe():252] probe done
+2022-07-29 19:06:26,000 DEBUG   SenderThread:179745 [sender.py:send():234] send: files
+2022-07-29 19:06:26,000 INFO    SenderThread:179745 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-29 19:06:26,000 INFO    SenderThread:179745 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-29 19:06:26,006 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:06:26,009 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:06:26,473 INFO    Thread-11 :179745 [upload_job.py:push():137] Uploaded file /tmp/tmpwan7wg8uwandb/2j2ps7it-wandb-metadata.json
+2022-07-29 19:06:26,579 INFO    Thread-8  :179745 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/wandb-metadata.json
+2022-07-29 19:06:26,579 INFO    Thread-8  :179745 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/output.log
+2022-07-29 19:06:26,579 INFO    Thread-8  :179745 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/requirements.txt
+2022-07-29 19:06:26,579 INFO    Thread-8  :179745 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/code/run_flax_speech_recognition_ctc.py
+2022-07-29 19:06:26,580 INFO    Thread-8  :179745 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/code
+2022-07-29 19:06:26,710 INFO    Thread-12 :179745 [upload_job.py:push():137] Uploaded file /tmp/tmpwan7wg8uwandb/2ncxbc2w-code/run_flax_speech_recognition_ctc.py
+2022-07-29 19:06:28,580 INFO    Thread-8  :179745 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/output.log
+2022-07-29 19:06:30,581 INFO    Thread-8  :179745 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/output.log
+2022-07-29 19:06:32,541 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:06:32,541 DEBUG   SenderThread:179745 [sender.py:send():234] send: telemetry
+2022-07-29 19:06:32,542 DEBUG   SenderThread:179745 [sender.py:send():234] send: exit
+2022-07-29 19:06:32,542 INFO    SenderThread:179745 [sender.py:send_exit():366] handling exit code: 1
+2022-07-29 19:06:32,542 INFO    SenderThread:179745 [sender.py:send_exit():368] handling runtime: 8
+2022-07-29 19:06:32,542 INFO    SenderThread:179745 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 19:06:32,542 INFO    SenderThread:179745 [sender.py:send_exit():374] send defer
+2022-07-29 19:06:32,543 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:06:32,543 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:06:32,543 INFO    HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-29 19:06:32,544 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:06:32,544 INFO    SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-29 19:06:32,544 INFO    SenderThread:179745 [sender.py:transition_state():387] send defer: 1
+2022-07-29 19:06:32,544 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:06:32,544 INFO    HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-29 19:06:32,581 INFO    Thread-8  :179745 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/output.log
+2022-07-29 19:06:32,582 INFO    Thread-8  :179745 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json
+2022-07-29 19:06:32,624 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:06:32,624 INFO    SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-29 19:06:32,624 INFO    SenderThread:179745 [sender.py:transition_state():387] send defer: 2
+2022-07-29 19:06:32,624 DEBUG   SenderThread:179745 [sender.py:send():234] send: stats
+2022-07-29 19:06:32,624 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:06:32,624 INFO    HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-29 19:06:32,625 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:06:32,625 INFO    SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-29 19:06:32,625 INFO    SenderThread:179745 [sender.py:transition_state():387] send defer: 3
+2022-07-29 19:06:32,625 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:06:32,625 INFO    HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-29 19:06:32,625 DEBUG   SenderThread:179745 [sender.py:send():234] send: summary
+2022-07-29 19:06:32,625 INFO    SenderThread:179745 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 19:06:32,625 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:06:32,625 INFO    SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-29 19:06:32,626 INFO    SenderThread:179745 [sender.py:transition_state():387] send defer: 4
+2022-07-29 19:06:32,626 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:06:32,626 INFO    HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-29 19:06:32,626 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:06:32,626 INFO    SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-29 19:06:32,645 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:06:32,789 INFO    SenderThread:179745 [sender.py:transition_state():387] send defer: 5
+2022-07-29 19:06:32,789 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:06:32,790 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:06:32,790 INFO    HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-29 19:06:32,790 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:06:32,790 INFO    SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-29 19:06:32,790 INFO    SenderThread:179745 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-29 19:06:32,891 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:06:33,582 INFO    Thread-8  :179745 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/config.yaml
+2022-07-29 19:06:33,582 INFO    SenderThread:179745 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json
+2022-07-29 19:06:33,583 INFO    SenderThread:179745 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files
+2022-07-29 19:06:33,583 INFO    SenderThread:179745 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/config.yaml config.yaml
+2022-07-29 19:06:33,583 INFO    SenderThread:179745 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/requirements.txt requirements.txt
+2022-07-29 19:06:33,583 INFO    SenderThread:179745 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/output.log output.log
+2022-07-29 19:06:33,583 INFO    SenderThread:179745 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json wandb-summary.json
+2022-07-29 19:06:33,584 INFO    SenderThread:179745 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/wandb-metadata.json wandb-metadata.json
+2022-07-29 19:06:33,586 INFO    SenderThread:179745 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-29 19:06:33,586 INFO    SenderThread:179745 [sender.py:transition_state():387] send defer: 6
+2022-07-29 19:06:33,587 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:06:33,592 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:06:33,593 INFO    HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-29 19:06:33,593 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:06:33,595 INFO    SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-29 19:06:33,596 INFO    SenderThread:179745 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 19:06:33,691 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:06:33,691 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:06:33,792 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:06:33,792 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:06:33,894 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:06:33,894 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:06:33,995 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:06:33,996 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:06:34,059 INFO    Thread-15 :179745 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/output.log
+2022-07-29 19:06:34,091 INFO    Thread-14 :179745 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/requirements.txt
+2022-07-29 19:06:34,097 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:06:34,097 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:06:34,102 INFO    Thread-16 :179745 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/wandb-summary.json
+2022-07-29 19:06:34,105 INFO    Thread-13 :179745 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/files/config.yaml
+2022-07-29 19:06:34,198 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:06:34,199 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:06:34,300 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:06:34,300 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:06:34,306 INFO    Thread-7  :179745 [sender.py:transition_state():387] send defer: 7
+2022-07-29 19:06:34,306 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:06:34,306 INFO    HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-29 19:06:34,307 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:06:34,307 INFO    SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-29 19:06:34,401 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:06:34,490 INFO    SenderThread:179745 [sender.py:transition_state():387] send defer: 8
+2022-07-29 19:06:34,491 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:06:34,491 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:06:34,491 INFO    HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-29 19:06:34,491 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:06:34,492 INFO    SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-29 19:06:34,492 INFO    SenderThread:179745 [sender.py:transition_state():387] send defer: 9
+2022-07-29 19:06:34,492 DEBUG   SenderThread:179745 [sender.py:send():234] send: final
+2022-07-29 19:06:34,492 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:06:34,492 INFO    HandlerThread:179745 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-29 19:06:34,492 DEBUG   SenderThread:179745 [sender.py:send():234] send: footer
+2022-07-29 19:06:34,493 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:06:34,493 INFO    SenderThread:179745 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-29 19:06:34,592 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:06:34,592 DEBUG   SenderThread:179745 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:06:34,592 INFO    SenderThread:179745 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 19:06:34,848 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-29 19:06:34,849 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-29 19:06:34,850 DEBUG   HandlerThread:179745 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-29 19:06:34,850 INFO    HandlerThread:179745 [handler.py:finish():731] shutting down handler
+2022-07-29 19:06:35,493 INFO    WriterThread:179745 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/run-1kplw9z9.wandb
+2022-07-29 19:06:35,848 INFO    SenderThread:179745 [sender.py:finish():1070] shutting down sender
+2022-07-29 19:06:35,848 INFO    SenderThread:179745 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 19:06:35,848 INFO    SenderThread:179745 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 19:06:35,850 INFO    MainThread:179745 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220729_190622-1kplw9z9/logs/debug.log b/wandb/run-20220729_190622-1kplw9z9/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..0474c8910175ed52486a17ec48155d7bfb62a7ec
--- /dev/null
+++ b/wandb/run-20220729_190622-1kplw9z9/logs/debug.log
@@ -0,0 +1,139 @@
+2022-07-29 19:06:22,538 INFO    MainThread:178475 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-29 19:06:22,539 INFO    MainThread:178475 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-29 19:06:22,539 INFO    MainThread:178475 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/logs/debug.log
+2022-07-29 19:06:22,539 INFO    MainThread:178475 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190622-1kplw9z9/logs/debug-internal.log
+2022-07-29 19:06:22,539 INFO    MainThread:178475 [wandb_init.py:init():404] calling init triggers
+2022-07-29 19:06:22,539 INFO    MainThread:178475 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-29 19:06:22,539 INFO    MainThread:178475 [wandb_init.py:init():460] starting backend
+2022-07-29 19:06:22,539 INFO    MainThread:178475 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-29 19:06:22,566 INFO    MainThread:178475 [backend.py:ensure_launched():216] starting backend process...
+2022-07-29 19:06:22,591 INFO    MainThread:178475 [backend.py:ensure_launched():221] started backend process with pid: 179745
+2022-07-29 19:06:22,593 INFO    MainThread:178475 [wandb_init.py:init():469] backend started and connected
+2022-07-29 19:06:22,606 INFO    MainThread:178475 [wandb_init.py:init():533] updated telemetry
+2022-07-29 19:06:22,668 INFO    MainThread:178475 [wandb_init.py:init():563] communicating current version
+2022-07-29 19:06:23,398 INFO    MainThread:178475 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-29 19:06:23,398 INFO    MainThread:178475 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-29 19:06:23,579 INFO    MainThread:178475 [wandb_init.py:init():606] starting run threads in backend
+2022-07-29 19:06:26,003 INFO    MainThread:178475 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-29 19:06:26,004 INFO    MainThread:178475 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-29 19:06:26,004 INFO    MainThread:178475 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-29 19:06:26,007 INFO    MainThread:178475 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-29 19:06:26,007 INFO    MainThread:178475 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-29 19:06:30,488 INFO    MainThread:178475 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-29 19:06:30,495 INFO    MainThread:178475 [wandb_run.py:_restore():1752] restore
+2022-07-29 19:06:32,544 INFO    MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 73701
+}
+
+2022-07-29 19:06:32,790 INFO    MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 73701
+}
+
+2022-07-29 19:06:33,590 INFO    MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 85497
+}
+
+2022-07-29 19:06:33,692 INFO    MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 85497
+}
+
+2022-07-29 19:06:33,793 INFO    MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85497
+  total_bytes: 85497
+}
+
+2022-07-29 19:06:33,895 INFO    MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85497
+  total_bytes: 85497
+}
+
+2022-07-29 19:06:33,996 INFO    MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85497
+  total_bytes: 85497
+}
+
+2022-07-29 19:06:34,098 INFO    MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85497
+  total_bytes: 85497
+}
+
+2022-07-29 19:06:34,199 INFO    MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85497
+  total_bytes: 85497
+}
+
+2022-07-29 19:06:34,301 INFO    MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85497
+  total_bytes: 85497
+}
+
+2022-07-29 19:06:34,491 INFO    MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85497
+  total_bytes: 85497
+}
+
+2022-07-29 19:06:34,848 INFO    MainThread:178475 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85497
+  total_bytes: 85497
+}
+local_info {
+}
+
+2022-07-29 19:06:36,413 INFO    MainThread:178475 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220729_190622-1kplw9z9/run-1kplw9z9.wandb b/wandb/run-20220729_190622-1kplw9z9/run-1kplw9z9.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..e843c03f2638b3c7aa1f54a1fe09fc3c1e740163
--- /dev/null
+++ b/wandb/run-20220729_190622-1kplw9z9/run-1kplw9z9.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bee9267448bf968d5a1cc3b1820fa16787862010fd2748e71bef4be596da455
+size 9435
diff --git a/wandb/run-20220729_190943-1pf464vg/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_190943-1pf464vg/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe
--- /dev/null
+++ b/wandb/run-20220729_190943-1pf464vg/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1596 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220729_190943-1pf464vg/files/config.yaml b/wandb/run-20220729_190943-1pf464vg/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f1c52576d03b2c70e0586b2a98360aea76d1ffec
--- /dev/null
+++ b/wandb/run-20220729_190943-1pf464vg/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659121783
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220729_190943-1pf464vg/files/output.log b/wandb/run-20220729_190943-1pf464vg/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..2598417c23468f7489ff8875757267000883d2e9
--- /dev/null
+++ b/wandb/run-20220729_190943-1pf464vg/files/output.log
@@ -0,0 +1,178 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul29_19-09-39_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=32,
+per_device_train_batch_size=32,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 83.43it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 456.55it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  6%|███████████████▎                                                                                                                                                                                                                                    | 16/256 [02:54<43:31, 10.88s/ba]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1596, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 870, in main
+    raw_datasets = make_dataset(seed=training_args.seed)
+  File "run_flax_speech_recognition_ctc.py", line 811, in make_dataset
+    nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed)
+  File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 865, in filter
+    {
+  File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 866, in <dictcomp>
+    k: dataset.filter(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper
+    out = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2904, in filter
+    indices = self.map(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2387, in map
+    return self._map_single(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 557, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 524, in wrapper
+    out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/fingerprint.py", line 480, in wrapper
+    out = func(self, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2775, in _map_single
+    batch = apply_function_on_filtered_inputs(
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2655, in apply_function_on_filtered_inputs
+    processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2347, in decorated
+    result = f(decorated_item, *args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in get_indices_from_mask_function
+    example = {key: batch[key][i] for key in batch}
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 4933, in <dictcomp>
+    example = {key: batch[key][i] for key in batch}
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 133, in __getitem__
+    values = [
+  File "/data/flax/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 134, in <listcomp>
+    decode_nested_example(self.features[key], value) if value is not None else None for value in values
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/features.py", line 1260, in decode_nested_example
+    return schema.decode_example(obj, token_per_repo_id=token_per_repo_id) if obj is not None else None
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 144, in decode_example
+    array, sampling_rate = self._decode_mp3(file if file else path)
+  File "/data/flax/lib/python3.8/site-packages/datasets/features/audio.py", line 293, in _decode_mp3
+    array, sampling_rate = torchaudio.load(path_or_file, format="mp3")
+  File "/data/flax/lib/python3.8/site-packages/torchaudio/backend/sox_io_backend.py", line 214, in load
+    return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format)
+  File "/data/flax/lib/python3.8/site-packages/torchaudio/io/_compat.py", line 110, in load_audio_fileobj
+    return _load_audio(s, frame_offset, num_frames, convert, channels_first)
+  File "/data/flax/lib/python3.8/site-packages/torchaudio/io/_compat.py", line 80, in _load_audio
+    waveform = s.pop_chunks()[0]
+KeyboardInterrupt
\ No newline at end of file
diff --git a/wandb/run-20220729_190943-1pf464vg/files/requirements.txt b/wandb/run-20220729_190943-1pf464vg/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20cb6abc20ac8cace779da721986ddb6a238845d
--- /dev/null
+++ b/wandb/run-20220729_190943-1pf464vg/files/requirements.txt
@@ -0,0 +1,151 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pygments==2.11.1
+pyparsing==3.0.6
+python-dateutil==2.8.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220729_190943-1pf464vg/files/wandb-metadata.json b/wandb/run-20220729_190943-1pf464vg/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..2189214625406d37ba7ca06fbaaf8bed8a8f1f9f
--- /dev/null
+++ b/wandb/run-20220729_190943-1pf464vg/files/wandb-metadata.json
@@ -0,0 +1,66 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-29T19:09:46.579137",
+    "startedAt": "2022-07-29T19:09:43.337367",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=32",
+        "--per_device_eval_batch_size=32",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220729_190943-1pf464vg/files/wandb-summary.json b/wandb/run-20220729_190943-1pf464vg/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..4cab2d787f922fafd371e8f23b9d4df2f6d91744
--- /dev/null
+++ b/wandb/run-20220729_190943-1pf464vg/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 183}}
\ No newline at end of file
diff --git a/wandb/run-20220729_190943-1pf464vg/logs/debug-internal.log b/wandb/run-20220729_190943-1pf464vg/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..3f4244c7b5c2d5f50398d527d5cc08482ee774a7
--- /dev/null
+++ b/wandb/run-20220729_190943-1pf464vg/logs/debug-internal.log
@@ -0,0 +1,195 @@
+2022-07-29 19:09:44,154 INFO    MainThread:186780 [internal.py:wandb_internal():87] W&B internal server running at pid: 186780, started at: 2022-07-29 19:09:44.154398
+2022-07-29 19:09:44,156 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: check_version
+2022-07-29 19:09:44,156 INFO    WriterThread:186780 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/run-1pf464vg.wandb
+2022-07-29 19:09:44,157 DEBUG   SenderThread:186780 [sender.py:send():234] send: header
+2022-07-29 19:09:44,157 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: check_version
+2022-07-29 19:09:44,192 DEBUG   SenderThread:186780 [sender.py:send():234] send: run
+2022-07-29 19:09:44,367 INFO    SenderThread:186780 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files
+2022-07-29 19:09:44,367 INFO    SenderThread:186780 [sender.py:_start_run_threads():804] run started: 1pf464vg with start time 1659121783
+2022-07-29 19:09:44,368 DEBUG   SenderThread:186780 [sender.py:send():234] send: summary
+2022-07-29 19:09:44,368 INFO    SenderThread:186780 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 19:09:44,369 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: run_start
+2022-07-29 19:09:45,368 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/wandb-summary.json
+2022-07-29 19:09:46,578 DEBUG   HandlerThread:186780 [meta.py:__init__():40] meta init
+2022-07-29 19:09:46,579 DEBUG   HandlerThread:186780 [meta.py:__init__():54] meta init done
+2022-07-29 19:09:46,579 DEBUG   HandlerThread:186780 [meta.py:probe():214] probe
+2022-07-29 19:09:46,580 DEBUG   HandlerThread:186780 [meta.py:_setup_git():204] setup git
+2022-07-29 19:09:46,609 DEBUG   HandlerThread:186780 [meta.py:_setup_git():211] setup git done
+2022-07-29 19:09:46,609 DEBUG   HandlerThread:186780 [meta.py:_save_code():92] save code
+2022-07-29 19:09:46,620 DEBUG   HandlerThread:186780 [meta.py:_save_code():113] save code done
+2022-07-29 19:09:46,620 DEBUG   HandlerThread:186780 [meta.py:_save_patches():130] save patches
+2022-07-29 19:09:46,675 DEBUG   HandlerThread:186780 [meta.py:_save_patches():172] save patches done
+2022-07-29 19:09:46,675 DEBUG   HandlerThread:186780 [meta.py:_save_pip():58] save pip
+2022-07-29 19:09:46,676 DEBUG   HandlerThread:186780 [meta.py:_save_pip():72] save pip done
+2022-07-29 19:09:46,676 DEBUG   HandlerThread:186780 [meta.py:probe():252] probe done
+2022-07-29 19:09:46,679 DEBUG   SenderThread:186780 [sender.py:send():234] send: files
+2022-07-29 19:09:46,679 INFO    SenderThread:186780 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-29 19:09:46,679 INFO    SenderThread:186780 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-29 19:09:46,683 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:09:46,684 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:09:47,349 INFO    Thread-11 :186780 [upload_job.py:push():137] Uploaded file /tmp/tmp5vmmygpewandb/1fuj1l6o-wandb-metadata.json
+2022-07-29 19:09:47,355 INFO    Thread-12 :186780 [upload_job.py:push():137] Uploaded file /tmp/tmp5vmmygpewandb/2x75esdp-code/run_flax_speech_recognition_ctc.py
+2022-07-29 19:09:47,374 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/requirements.txt
+2022-07-29 19:09:47,375 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/wandb-metadata.json
+2022-07-29 19:09:47,375 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/code/run_flax_speech_recognition_ctc.py
+2022-07-29 19:09:47,375 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:09:47,375 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/code
+2022-07-29 19:09:49,375 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:09:51,376 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:09:53,376 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:10:01,850 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:10:01,851 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:10:03,380 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:10:14,648 DEBUG   SenderThread:186780 [sender.py:send():234] send: stats
+2022-07-29 19:10:15,385 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:10:17,032 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:10:17,032 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:10:25,388 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:10:32,192 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:10:32,192 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:10:35,392 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:10:44,706 DEBUG   SenderThread:186780 [sender.py:send():234] send: stats
+2022-07-29 19:10:45,395 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:10:47,351 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:10:47,351 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:10:57,399 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:11:02,519 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:11:02,519 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:11:07,403 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:11:14,764 DEBUG   SenderThread:186780 [sender.py:send():234] send: stats
+2022-07-29 19:11:17,661 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:11:17,661 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:11:19,407 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:11:29,411 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:11:32,824 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:11:32,824 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:11:39,415 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:11:44,822 DEBUG   SenderThread:186780 [sender.py:send():234] send: stats
+2022-07-29 19:11:47,986 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:11:47,987 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:11:51,419 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:12:01,423 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:12:03,139 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:12:03,139 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:12:11,426 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:12:14,885 DEBUG   SenderThread:186780 [sender.py:send():234] send: stats
+2022-07-29 19:12:18,295 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:12:18,295 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:12:23,430 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:12:33,434 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:12:33,455 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:12:33,455 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:12:43,438 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:12:44,945 DEBUG   SenderThread:186780 [sender.py:send():234] send: stats
+2022-07-29 19:12:45,034 WARNING MainThread:186780 [internal.py:wandb_internal():146] Internal process interrupt: 1
+2022-07-29 19:12:47,439 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:12:47,542 DEBUG   SenderThread:186780 [sender.py:send():234] send: telemetry
+2022-07-29 19:12:47,542 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:12:47,542 DEBUG   SenderThread:186780 [sender.py:send():234] send: exit
+2022-07-29 19:12:47,543 INFO    SenderThread:186780 [sender.py:send_exit():366] handling exit code: 255
+2022-07-29 19:12:47,543 INFO    SenderThread:186780 [sender.py:send_exit():368] handling runtime: 183
+2022-07-29 19:12:47,543 INFO    SenderThread:186780 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 19:12:47,544 INFO    SenderThread:186780 [sender.py:send_exit():374] send defer
+2022-07-29 19:12:47,544 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:12:47,544 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:12:47,544 INFO    HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-29 19:12:47,544 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:12:47,544 INFO    SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-29 19:12:47,544 INFO    SenderThread:186780 [sender.py:transition_state():387] send defer: 1
+2022-07-29 19:12:47,545 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:12:47,545 INFO    HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-29 19:12:47,552 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:12:47,552 INFO    SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-29 19:12:47,552 INFO    SenderThread:186780 [sender.py:transition_state():387] send defer: 2
+2022-07-29 19:12:47,552 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:12:47,552 INFO    HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-29 19:12:47,552 DEBUG   SenderThread:186780 [sender.py:send():234] send: stats
+2022-07-29 19:12:47,553 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:12:47,553 INFO    SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-29 19:12:47,553 INFO    SenderThread:186780 [sender.py:transition_state():387] send defer: 3
+2022-07-29 19:12:47,553 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:12:47,553 INFO    HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-29 19:12:47,553 DEBUG   SenderThread:186780 [sender.py:send():234] send: summary
+2022-07-29 19:12:47,574 INFO    SenderThread:186780 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 19:12:47,575 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:12:47,575 INFO    SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-29 19:12:47,575 INFO    SenderThread:186780 [sender.py:transition_state():387] send defer: 4
+2022-07-29 19:12:47,575 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:12:47,575 INFO    HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-29 19:12:47,575 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:12:47,575 INFO    SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-29 19:12:47,646 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:12:47,755 INFO    SenderThread:186780 [sender.py:transition_state():387] send defer: 5
+2022-07-29 19:12:47,755 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:12:47,756 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:12:47,756 INFO    HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-29 19:12:47,756 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:12:47,756 INFO    SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-29 19:12:47,756 INFO    SenderThread:186780 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-29 19:12:47,857 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:12:48,440 INFO    Thread-8  :186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/wandb-summary.json
+2022-07-29 19:12:48,440 INFO    SenderThread:186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/config.yaml
+2022-07-29 19:12:48,440 INFO    SenderThread:186780 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:12:48,440 INFO    SenderThread:186780 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files
+2022-07-29 19:12:48,441 INFO    SenderThread:186780 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/config.yaml config.yaml
+2022-07-29 19:12:48,441 INFO    SenderThread:186780 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/requirements.txt requirements.txt
+2022-07-29 19:12:48,441 INFO    SenderThread:186780 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log output.log
+2022-07-29 19:12:48,441 INFO    SenderThread:186780 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/wandb-summary.json wandb-summary.json
+2022-07-29 19:12:48,444 INFO    SenderThread:186780 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/wandb-metadata.json wandb-metadata.json
+2022-07-29 19:12:48,446 INFO    SenderThread:186780 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-29 19:12:48,446 INFO    SenderThread:186780 [sender.py:transition_state():387] send defer: 6
+2022-07-29 19:12:48,446 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:12:48,449 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:12:48,449 INFO    HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-29 19:12:48,450 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:12:48,450 INFO    SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-29 19:12:48,450 INFO    SenderThread:186780 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 19:12:48,548 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:12:48,548 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:12:48,650 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:12:48,650 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:12:48,751 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:12:48,751 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:12:48,852 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:12:48,852 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:12:48,921 INFO    Thread-14 :186780 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/requirements.txt
+2022-07-29 19:12:48,923 INFO    Thread-13 :186780 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/config.yaml
+2022-07-29 19:12:48,940 INFO    Thread-15 :186780 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/output.log
+2022-07-29 19:12:48,954 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:12:48,954 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:12:48,958 INFO    Thread-16 :186780 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/files/wandb-summary.json
+2022-07-29 19:12:49,056 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:12:49,056 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:12:49,157 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:12:49,157 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:12:49,158 INFO    Thread-7  :186780 [sender.py:transition_state():387] send defer: 7
+2022-07-29 19:12:49,159 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:12:49,159 INFO    HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-29 19:12:49,159 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:12:49,159 INFO    SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-29 19:12:49,258 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:12:49,632 INFO    SenderThread:186780 [sender.py:transition_state():387] send defer: 8
+2022-07-29 19:12:49,632 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:12:49,632 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:12:49,633 INFO    HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-29 19:12:49,633 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:12:49,633 INFO    SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-29 19:12:49,633 INFO    SenderThread:186780 [sender.py:transition_state():387] send defer: 9
+2022-07-29 19:12:49,633 DEBUG   SenderThread:186780 [sender.py:send():234] send: final
+2022-07-29 19:12:49,633 DEBUG   SenderThread:186780 [sender.py:send():234] send: footer
+2022-07-29 19:12:49,633 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 19:12:49,633 INFO    HandlerThread:186780 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-29 19:12:49,634 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: defer
+2022-07-29 19:12:49,634 INFO    SenderThread:186780 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-29 19:12:49,733 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 19:12:49,734 DEBUG   SenderThread:186780 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 19:12:49,734 INFO    SenderThread:186780 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 19:12:49,995 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-29 19:12:49,995 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-29 19:12:49,996 DEBUG   HandlerThread:186780 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-29 19:12:49,996 INFO    HandlerThread:186780 [handler.py:finish():731] shutting down handler
+2022-07-29 19:12:50,633 INFO    WriterThread:186780 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/run-1pf464vg.wandb
+2022-07-29 19:12:50,994 INFO    SenderThread:186780 [sender.py:finish():1070] shutting down sender
+2022-07-29 19:12:50,994 INFO    SenderThread:186780 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 19:12:50,994 INFO    SenderThread:186780 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 19:12:50,996 INFO    MainThread:186780 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220729_190943-1pf464vg/logs/debug.log b/wandb/run-20220729_190943-1pf464vg/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..0e7ab297a504c9471649ea54a9b52ee002aa91d8
--- /dev/null
+++ b/wandb/run-20220729_190943-1pf464vg/logs/debug.log
@@ -0,0 +1,139 @@
+2022-07-29 19:09:43,338 INFO    MainThread:185526 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-29 19:09:43,338 INFO    MainThread:185526 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-29 19:09:43,339 INFO    MainThread:185526 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/logs/debug.log
+2022-07-29 19:09:43,339 INFO    MainThread:185526 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_190943-1pf464vg/logs/debug-internal.log
+2022-07-29 19:09:43,339 INFO    MainThread:185526 [wandb_init.py:init():404] calling init triggers
+2022-07-29 19:09:43,339 INFO    MainThread:185526 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-29 19:09:43,339 INFO    MainThread:185526 [wandb_init.py:init():460] starting backend
+2022-07-29 19:09:43,339 INFO    MainThread:185526 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-29 19:09:43,366 INFO    MainThread:185526 [backend.py:ensure_launched():216] starting backend process...
+2022-07-29 19:09:43,391 INFO    MainThread:185526 [backend.py:ensure_launched():221] started backend process with pid: 186780
+2022-07-29 19:09:43,394 INFO    MainThread:185526 [wandb_init.py:init():469] backend started and connected
+2022-07-29 19:09:43,407 INFO    MainThread:185526 [wandb_init.py:init():533] updated telemetry
+2022-07-29 19:09:43,471 INFO    MainThread:185526 [wandb_init.py:init():563] communicating current version
+2022-07-29 19:09:44,191 INFO    MainThread:185526 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-29 19:09:44,191 INFO    MainThread:185526 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-29 19:09:44,368 INFO    MainThread:185526 [wandb_init.py:init():606] starting run threads in backend
+2022-07-29 19:09:46,683 INFO    MainThread:185526 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-29 19:09:46,683 INFO    MainThread:185526 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-29 19:09:46,684 INFO    MainThread:185526 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-29 19:09:46,686 INFO    MainThread:185526 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-29 19:09:46,686 INFO    MainThread:185526 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-29 19:12:45,038 INFO    MainThread:185526 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 255
+2022-07-29 19:12:45,045 INFO    MainThread:185526 [wandb_run.py:_restore():1752] restore
+2022-07-29 19:12:47,544 INFO    MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 73701
+}
+
+2022-07-29 19:12:47,756 INFO    MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 73701
+}
+
+2022-07-29 19:12:48,447 INFO    MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 85823
+}
+
+2022-07-29 19:12:48,549 INFO    MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73701
+  total_bytes: 85823
+}
+
+2022-07-29 19:12:48,650 INFO    MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85823
+  total_bytes: 85823
+}
+
+2022-07-29 19:12:48,752 INFO    MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85823
+  total_bytes: 85823
+}
+
+2022-07-29 19:12:48,853 INFO    MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85823
+  total_bytes: 85823
+}
+
+2022-07-29 19:12:48,955 INFO    MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85823
+  total_bytes: 85823
+}
+
+2022-07-29 19:12:49,056 INFO    MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85823
+  total_bytes: 85823
+}
+
+2022-07-29 19:12:49,158 INFO    MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85823
+  total_bytes: 85823
+}
+
+2022-07-29 19:12:49,633 INFO    MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85823
+  total_bytes: 85823
+}
+
+2022-07-29 19:12:49,994 INFO    MainThread:185526 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 85823
+  total_bytes: 85823
+}
+local_info {
+}
+
+2022-07-29 19:12:51,471 INFO    MainThread:185526 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220729_190943-1pf464vg/run-1pf464vg.wandb b/wandb/run-20220729_190943-1pf464vg/run-1pf464vg.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..ed25aa0b67243a6cecd2022e2d311e2cad49a440
--- /dev/null
+++ b/wandb/run-20220729_190943-1pf464vg/run-1pf464vg.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:745d88cafd1997dcb33256411787bcb772b7f8a0771002838f831409a6c2f091
+size 16769
diff --git a/wandb/run-20220729_191324-ovnz8vs0/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_191324-ovnz8vs0/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..934c1109c5f92a96e47636edcd49612a996dd9fe
--- /dev/null
+++ b/wandb/run-20220729_191324-ovnz8vs0/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1596 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220729_191324-ovnz8vs0/files/config.yaml b/wandb/run-20220729_191324-ovnz8vs0/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ac607fa6c98392ba5fb1cd10e705d04b0553eca0
--- /dev/null
+++ b/wandb/run-20220729_191324-ovnz8vs0/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659122004
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220729_191324-ovnz8vs0/files/output.log b/wandb/run-20220729_191324-ovnz8vs0/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..257138140629bb147ad6b25f5629f8ed53b1cb2b
--- /dev/null
+++ b/wandb/run-20220729_191324-ovnz8vs0/files/output.log
@@ -0,0 +1,804 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul29_19-13-20_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=128,
+per_device_train_batch_size=128,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 78.60it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 469.14it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 256/256 [46:10<00:00, 10.82s/ba]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [13:50<00:00, 10.92s/ba]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 35/35 [06:10<00:00, 10.57s/ba]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 253741/253741 [01:39<00:00, 2560.29ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 75617/75617 [00:28<00:00, 2669.08ex/s]
+
+
+
+
+
+
+100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33807/33807 [00:12<00:00, 2611.45ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 52/52 [10:01<00:00, 11.57s/ba]
+
+
+
+
+
+
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:19<00:00, 11.34s/ba]
+
+
+
+
+
+
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:13<00:00, 10.51s/ba]
+
+
+
+
+
+
+100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 51034/51034 [00:11<00:00, 4396.10ex/s]
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6839/6839 [00:01<00:00, 4568.23ex/s]
+
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6323/6323 [00:01<00:00, 4447.61ex/s]
+https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json not found in cache or force_download set to True, downloading to /home/javierr/.cache/huggingface/transformers/tmpx3at8so3
+Downloading config.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.53k/1.53k [00:00<00:00, 989kB/s]
+storing https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json in cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+creating metadata file for /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json not found in cache or force_download set to True, downloading to /home/javierr/.cache/huggingface/transformers/tmp9q1m_ych
+Downloading preprocessor_config.json: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 212/212 [00:00<00:00, 163kB/s]
+storing https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json in cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+creating metadata file for /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /home/javierr/.cache/huggingface/transformers/tmp9ya5l4jm
+Downloading tokenizer_config.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 306/306 [00:00<00:00, 226kB/s]
+storing https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/tokenizer_config.json in cache at /home/javierr/.cache/huggingface/transformers/32ac0765a83a8c538f4d3d8f104f69eab9648cb4d3e3a6bb6463185512f204d4.ff272c14a3ca512c9d9f85a898e6a086e8f790b06ff540f484000dab53132349
+creating metadata file for /home/javierr/.cache/huggingface/transformers/32ac0765a83a8c538f4d3d8f104f69eab9648cb4d3e3a6bb6463185512f204d4.ff272c14a3ca512c9d9f85a898e6a086e8f790b06ff540f484000dab53132349
+https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/vocab.json not found in cache or force_download set to True, downloading to /home/javierr/.cache/huggingface/transformers/tmp7d5al8e3
+Downloading vocab.json: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 289/289 [00:00<00:00, 173kB/s]
+storing https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/vocab.json in cache at /home/javierr/.cache/huggingface/transformers/dbb47708b30c45561aee53d4cc808c3ed6c4785df1e2b757c8852837f6f58537.cd4a163fb068cd596a09687d4b1d7e345279d1497fdf547b3c986f4d6ee855b8
+creating metadata file for /home/javierr/.cache/huggingface/transformers/dbb47708b30c45561aee53d4cc808c3ed6c4785df1e2b757c8852837f6f58537.cd4a163fb068cd596a09687d4b1d7e345279d1497fdf547b3c986f4d6ee855b8
+https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/added_tokens.json not found in cache or force_download set to True, downloading to /home/javierr/.cache/huggingface/transformers/tmpqbnp6r29
+Downloading added_tokens.json: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23.0/23.0 [00:00<00:00, 19.3kB/s]
+storing https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/added_tokens.json in cache at /home/javierr/.cache/huggingface/transformers/a86e893e88bb40472d24ffc4e2be032578bb14755e88bb1e961bad6276161a17.23a5f4e269f91a88f81d8cb3c6f881c8eb30ad98b9c9691569d4cfb87512d722
+creating metadata file for /home/javierr/.cache/huggingface/transformers/a86e893e88bb40472d24ffc4e2be032578bb14755e88bb1e961bad6276161a17.23a5f4e269f91a88f81d8cb3c6f881c8eb30ad98b9c9691569d4cfb87512d722
+https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /home/javierr/.cache/huggingface/transformers/tmpjnl6q31b
+Downloading special_tokens_map.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.06k/1.06k [00:00<00:00, 689kB/s]
+storing https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/special_tokens_map.json in cache at /home/javierr/.cache/huggingface/transformers/c0891e9bf7d08f1e6d4c0d7da3104155480b7f65b84d2e1ef0506f017fd60443.2fce57d7d6ae62285f9966753bbdba194f7ff904d5f380e3bb6a6242819f9a93
+creating metadata file for /home/javierr/.cache/huggingface/transformers/c0891e9bf7d08f1e6d4c0d7da3104155480b7f65b84d2e1ef0506f017fd60443.2fce57d7d6ae62285f9966753bbdba194f7ff904d5f380e3bb6a6242819f9a93
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/vocab.json from cache at /home/javierr/.cache/huggingface/transformers/dbb47708b30c45561aee53d4cc808c3ed6c4785df1e2b757c8852837f6f58537.cd4a163fb068cd596a09687d4b1d7e345279d1497fdf547b3c986f4d6ee855b8
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/tokenizer_config.json from cache at /home/javierr/.cache/huggingface/transformers/32ac0765a83a8c538f4d3d8f104f69eab9648cb4d3e3a6bb6463185512f204d4.ff272c14a3ca512c9d9f85a898e6a086e8f790b06ff540f484000dab53132349
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/added_tokens.json from cache at /home/javierr/.cache/huggingface/transformers/a86e893e88bb40472d24ffc4e2be032578bb14755e88bb1e961bad6276161a17.23a5f4e269f91a88f81d8cb3c6f881c8eb30ad98b9c9691569d4cfb87512d722
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/special_tokens_map.json from cache at /home/javierr/.cache/huggingface/transformers/c0891e9bf7d08f1e6d4c0d7da3104155480b7f65b84d2e1ef0506f017fd60443.2fce57d7d6ae62285f9966753bbdba194f7ff904d5f380e3bb6a6242819f9a93
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+Traceback (most recent call last):
+  File "/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py", line 674, in from_pretrained
+    resolved_archive_file = cached_path(
+  File "/data/flax/lib/python3.8/site-packages/transformers/utils/hub.py", line 284, in cached_path
+    output_path = get_from_cache(
+  File "/data/flax/lib/python3.8/site-packages/transformers/utils/hub.py", line 502, in get_from_cache
+    _raise_for_status(r)
+  File "/data/flax/lib/python3.8/site-packages/transformers/utils/hub.py", line 411, in _raise_for_status
+    raise EntryNotFoundError(f"404 Client Error: Entry Not Found for url: {response.url}")
+transformers.utils.hub.EntryNotFoundError: 404 Client Error: Entry Not Found for url: https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/flax_model.msgpack
+During handling of the above exception, another exception occurred:
+Traceback (most recent call last):
+  File "/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py", line 707, in from_pretrained
+    resolved_archive_file = cached_path(
+  File "/data/flax/lib/python3.8/site-packages/transformers/utils/hub.py", line 284, in cached_path
+    output_path = get_from_cache(
+  File "/data/flax/lib/python3.8/site-packages/transformers/utils/hub.py", line 502, in get_from_cache
+    _raise_for_status(r)
+  File "/data/flax/lib/python3.8/site-packages/transformers/utils/hub.py", line 411, in _raise_for_status
+    raise EntryNotFoundError(f"404 Client Error: Entry Not Found for url: {response.url}")
+transformers.utils.hub.EntryNotFoundError: 404 Client Error: Entry Not Found for url: https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/flax_model.msgpack.index.json
+During handling of the above exception, another exception occurred:
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1596, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 988, in main
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+  File "/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py", line 721, in from_pretrained
+    raise EnvironmentError(
+OSError: facebook/wav2vec2-xls-r-1b does not appear to have a file named flax_model.msgpack but there is a file for PyTorch weights. Use `from_pt=True` to load this model from those weights.
\ No newline at end of file
diff --git a/wandb/run-20220729_191324-ovnz8vs0/files/requirements.txt b/wandb/run-20220729_191324-ovnz8vs0/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20cb6abc20ac8cace779da721986ddb6a238845d
--- /dev/null
+++ b/wandb/run-20220729_191324-ovnz8vs0/files/requirements.txt
@@ -0,0 +1,151 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pygments==2.11.1
+pyparsing==3.0.6
+python-dateutil==2.8.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220729_191324-ovnz8vs0/files/wandb-metadata.json b/wandb/run-20220729_191324-ovnz8vs0/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a0657ff0d4f40d89b7101e7493aea4e29abc549
--- /dev/null
+++ b/wandb/run-20220729_191324-ovnz8vs0/files/wandb-metadata.json
@@ -0,0 +1,66 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-29T19:13:28.123640",
+    "startedAt": "2022-07-29T19:13:24.831402",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=128",
+        "--per_device_eval_batch_size=128",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220729_191324-ovnz8vs0/files/wandb-summary.json b/wandb/run-20220729_191324-ovnz8vs0/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..0d78a12cd0bb70c6bb4e30b9459aab8fa516bba0
--- /dev/null
+++ b/wandb/run-20220729_191324-ovnz8vs0/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 4912}}
\ No newline at end of file
diff --git a/wandb/run-20220729_191324-ovnz8vs0/logs/debug-internal.log b/wandb/run-20220729_191324-ovnz8vs0/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..7e9f3cacc1c921e78fdbcfcf9a15781ecf597023
--- /dev/null
+++ b/wandb/run-20220729_191324-ovnz8vs0/logs/debug-internal.log
@@ -0,0 +1,1476 @@
+2022-07-29 19:13:25,646 INFO    MainThread:1749416 [internal.py:wandb_internal():87] W&B internal server running at pid: 1749416, started at: 2022-07-29 19:13:25.646038
+2022-07-29 19:13:25,648 INFO    WriterThread:1749416 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/run-ovnz8vs0.wandb
+2022-07-29 19:13:25,648 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: check_version
+2022-07-29 19:13:25,649 DEBUG   SenderThread:1749416 [sender.py:send():234] send: header
+2022-07-29 19:13:25,649 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: check_version
+2022-07-29 19:13:25,688 DEBUG   SenderThread:1749416 [sender.py:send():234] send: run
+2022-07-29 19:13:25,857 INFO    SenderThread:1749416 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files
+2022-07-29 19:13:25,857 INFO    SenderThread:1749416 [sender.py:_start_run_threads():804] run started: ovnz8vs0 with start time 1659122004
+2022-07-29 19:13:25,859 DEBUG   SenderThread:1749416 [sender.py:send():234] send: summary
+2022-07-29 19:13:25,859 INFO    SenderThread:1749416 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 19:13:25,860 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: run_start
+2022-07-29 19:13:26,862 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/wandb-summary.json
+2022-07-29 19:13:28,123 DEBUG   HandlerThread:1749416 [meta.py:__init__():40] meta init
+2022-07-29 19:13:28,123 DEBUG   HandlerThread:1749416 [meta.py:__init__():54] meta init done
+2022-07-29 19:13:28,123 DEBUG   HandlerThread:1749416 [meta.py:probe():214] probe
+2022-07-29 19:13:28,124 DEBUG   HandlerThread:1749416 [meta.py:_setup_git():204] setup git
+2022-07-29 19:13:28,155 DEBUG   HandlerThread:1749416 [meta.py:_setup_git():211] setup git done
+2022-07-29 19:13:28,155 DEBUG   HandlerThread:1749416 [meta.py:_save_code():92] save code
+2022-07-29 19:13:28,167 DEBUG   HandlerThread:1749416 [meta.py:_save_code():113] save code done
+2022-07-29 19:13:28,167 DEBUG   HandlerThread:1749416 [meta.py:_save_patches():130] save patches
+2022-07-29 19:13:28,222 DEBUG   HandlerThread:1749416 [meta.py:_save_patches():172] save patches done
+2022-07-29 19:13:28,222 DEBUG   HandlerThread:1749416 [meta.py:_save_pip():58] save pip
+2022-07-29 19:13:28,223 DEBUG   HandlerThread:1749416 [meta.py:_save_pip():72] save pip done
+2022-07-29 19:13:28,223 DEBUG   HandlerThread:1749416 [meta.py:probe():252] probe done
+2022-07-29 19:13:28,226 DEBUG   SenderThread:1749416 [sender.py:send():234] send: files
+2022-07-29 19:13:28,226 INFO    SenderThread:1749416 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-29 19:13:28,227 INFO    SenderThread:1749416 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-29 19:13:28,233 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:13:28,233 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:13:28,788 INFO    Thread-11 :1749416 [upload_job.py:push():137] Uploaded file /tmp/tmpp4joxsprwandb/1xi0foyf-wandb-metadata.json
+2022-07-29 19:13:28,865 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/wandb-metadata.json
+2022-07-29 19:13:28,865 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/requirements.txt
+2022-07-29 19:13:28,865 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:13:28,865 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/code/run_flax_speech_recognition_ctc.py
+2022-07-29 19:13:28,865 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/code
+2022-07-29 19:13:28,904 INFO    Thread-12 :1749416 [upload_job.py:push():137] Uploaded file /tmp/tmpp4joxsprwandb/1mmr2akn-code/run_flax_speech_recognition_ctc.py
+2022-07-29 19:13:30,865 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:13:32,866 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:13:34,867 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:13:43,384 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:13:43,385 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:13:44,872 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:13:56,197 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:13:56,877 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:13:58,544 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:13:58,544 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:14:06,882 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:14:13,688 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:14:13,688 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:14:18,887 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:14:26,257 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:14:28,844 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:14:28,844 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:14:28,892 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:14:38,896 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:14:43,988 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:14:43,988 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:14:50,901 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:14:56,318 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:14:59,129 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:14:59,130 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:15:00,905 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:15:12,910 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:15:14,595 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:15:14,596 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:15:22,914 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:15:26,384 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:15:29,753 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:15:29,754 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:15:34,919 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:15:44,900 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:15:44,900 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:15:44,923 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:15:54,927 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:15:56,447 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:16:00,060 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:16:00,060 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:16:07,932 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:16:15,210 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:16:15,211 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:16:17,936 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:16:26,512 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:16:27,940 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:16:30,360 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:16:30,361 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:16:39,945 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:16:45,516 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:16:45,517 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:16:49,949 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:16:56,576 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:17:00,669 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:17:00,670 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:17:01,954 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:17:11,958 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:17:15,812 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:17:15,812 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:17:21,962 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:17:26,642 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:17:30,960 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:17:30,961 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:17:33,967 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:17:43,971 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:17:46,746 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:17:46,747 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:17:55,976 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:17:56,712 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:18:01,895 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:18:01,895 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:18:05,981 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:18:15,985 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:18:17,057 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:18:17,057 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:18:26,783 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:18:27,990 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:18:32,212 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:18:32,212 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:18:39,996 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:18:47,360 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:18:47,360 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:18:50,001 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:18:56,856 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:19:00,005 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:19:02,508 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:19:02,508 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:19:12,011 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:19:17,659 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:19:17,660 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:19:22,015 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:19:26,927 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:19:32,806 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:19:32,806 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:19:34,020 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:19:44,024 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:19:47,956 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:19:47,957 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:19:54,028 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:19:56,999 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:20:03,105 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:20:03,105 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:20:06,033 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:20:16,038 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:20:18,284 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:20:18,284 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:20:26,042 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:20:27,071 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:20:33,437 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:20:33,437 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:20:39,049 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:20:48,629 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:20:48,630 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:20:49,053 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:20:57,143 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:20:59,058 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:21:03,899 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:21:03,900 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:21:11,063 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:21:19,089 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:21:19,090 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:21:21,068 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:21:27,214 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:21:31,073 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:21:34,244 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:21:34,244 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:21:43,079 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:21:49,397 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:21:49,398 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:21:53,083 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:21:57,286 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:22:03,088 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:22:04,549 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:22:04,549 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:22:15,094 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:22:19,717 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:22:19,718 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:22:25,099 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:22:27,355 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:22:34,892 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:22:34,893 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:22:35,104 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:22:47,110 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:22:50,043 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:22:50,043 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:22:57,114 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:22:57,414 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:23:05,200 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:23:05,201 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:23:07,119 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:23:19,125 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:23:20,359 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:23:20,359 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:23:27,473 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:23:29,129 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:23:35,517 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:23:35,517 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:23:41,135 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:23:50,672 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:23:50,672 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:23:51,139 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:23:57,535 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:24:03,144 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:24:05,857 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:24:05,858 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:24:13,148 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:24:21,017 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:24:21,018 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:24:23,153 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:24:27,594 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:24:35,158 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:24:36,179 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:24:36,180 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:24:45,163 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:24:51,368 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:24:51,368 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:24:57,168 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:24:57,654 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:25:06,514 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:25:06,514 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:25:07,172 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:25:20,178 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:25:21,675 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:25:21,676 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:25:27,714 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:25:30,182 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:25:36,834 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:25:36,834 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:25:40,187 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:25:51,996 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:25:51,996 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:25:52,192 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:25:57,774 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:26:02,197 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:26:07,151 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:26:07,152 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:26:12,201 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:26:22,416 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:26:22,417 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:26:24,206 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:26:27,833 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:26:34,210 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:26:37,669 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:26:37,669 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:26:44,214 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:26:52,828 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:26:52,828 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:26:56,219 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:26:57,895 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:27:06,224 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:27:07,986 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:27:07,986 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:27:16,228 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:27:23,163 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:27:23,163 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:27:27,954 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:27:28,233 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:27:38,237 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:27:38,326 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:27:38,327 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:27:50,242 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:27:53,484 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:27:53,484 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:27:58,014 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:28:00,246 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:28:08,646 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:28:08,646 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:28:10,250 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:28:22,255 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:28:23,804 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:28:23,804 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:28:28,074 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:28:32,259 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:28:38,953 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:28:38,954 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:28:42,263 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:28:54,109 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:28:54,109 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:28:54,268 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:28:58,138 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:29:06,273 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:29:09,269 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:29:09,270 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:29:16,278 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:29:24,431 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:29:24,431 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:29:26,282 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:29:28,198 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:29:39,288 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:29:39,578 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:29:39,578 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:29:49,292 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:29:54,734 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:29:54,735 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:29:58,258 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:29:59,297 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:30:09,899 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:30:09,899 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:30:11,302 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:30:21,307 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:30:25,056 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:30:25,056 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:30:28,318 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:30:33,313 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:30:40,249 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:30:40,250 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:30:43,317 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:30:53,322 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:30:55,492 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:30:55,492 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:30:58,390 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:31:05,326 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:31:10,651 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:31:10,652 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:31:15,331 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:31:25,806 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:31:25,809 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:31:27,336 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:31:28,455 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:31:37,340 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:31:40,972 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:31:40,973 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:31:49,346 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:31:56,134 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:31:56,135 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:31:58,515 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:31:59,350 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:32:09,355 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:32:11,346 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:32:11,346 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:32:19,360 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:32:26,496 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:32:26,496 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:32:28,574 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:32:31,365 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:32:41,370 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:32:41,638 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:32:41,638 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:32:53,375 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:32:56,811 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:32:56,811 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:32:58,634 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:33:03,379 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:33:11,972 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:33:11,972 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:33:13,384 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:33:25,388 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:33:27,123 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:33:27,124 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:33:28,701 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:33:35,392 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:33:42,275 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:33:42,276 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:33:45,397 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:33:57,402 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:33:57,429 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:33:57,430 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:33:58,770 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:34:07,406 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:34:12,600 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:34:12,600 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:34:20,413 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:34:27,756 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:34:27,756 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:34:28,841 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:34:30,417 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:34:42,422 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:34:42,912 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:34:42,912 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:34:52,426 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:34:58,075 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:34:58,076 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:34:58,913 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:35:02,431 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:35:13,226 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:35:13,227 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:35:14,436 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:35:24,440 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:35:28,383 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:35:28,383 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:35:28,987 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:35:34,444 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:35:43,542 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:35:43,542 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:35:46,449 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:35:56,453 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:35:58,714 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:35:58,715 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:35:59,057 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:36:08,459 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:36:13,979 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:36:13,979 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:36:18,463 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:36:28,467 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:36:29,128 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:36:29,148 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:36:29,148 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:36:40,472 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:36:44,312 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:36:44,313 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:36:50,476 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:36:59,200 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:36:59,465 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:36:59,465 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:37:00,480 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:37:12,485 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:37:14,643 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:37:14,643 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:37:22,489 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:37:29,273 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:37:29,804 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:37:29,804 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:37:32,493 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:37:44,498 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:37:44,964 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:37:44,965 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:37:54,502 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:37:59,345 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:38:00,126 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:38:00,126 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:38:04,506 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:38:15,279 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:38:15,279 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:38:16,510 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:38:26,514 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:38:29,418 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:38:30,427 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:38:30,427 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:38:39,520 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:38:45,594 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:38:45,594 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:38:49,524 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:38:59,491 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:38:59,528 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:39:00,751 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:39:00,751 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:39:11,533 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:39:15,913 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:39:15,914 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:39:21,537 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:39:29,562 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:39:31,068 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:39:31,069 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:39:33,542 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:39:43,546 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:39:46,227 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:39:46,227 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:39:55,551 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:39:59,623 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:40:01,377 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:40:01,377 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:40:05,556 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:40:15,560 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:40:16,535 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:40:16,536 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:40:27,565 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:40:29,684 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:40:31,696 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:40:31,696 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:40:37,570 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:40:46,840 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:40:46,840 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:40:47,574 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:40:59,578 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:40:59,743 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:41:02,009 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:41:02,009 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:41:09,582 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:41:17,160 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:41:17,161 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:41:21,587 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:41:29,803 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:41:31,592 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:41:32,309 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:41:32,309 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:41:41,596 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:41:47,463 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:41:47,464 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:41:53,601 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:41:59,863 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:42:02,621 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:42:02,622 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:42:03,606 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:42:13,611 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:42:17,782 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:42:17,782 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:42:25,616 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:42:29,922 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:42:32,921 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:42:32,921 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:42:35,621 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:42:47,626 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:42:48,103 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:42:48,104 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:42:57,630 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:42:59,994 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:43:03,255 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:43:03,255 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:43:08,634 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:43:18,404 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:43:18,404 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:43:20,639 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:43:30,065 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:43:30,643 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:43:33,560 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:43:33,561 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:43:40,647 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:43:48,717 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:43:48,717 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:43:52,652 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:44:00,137 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:44:02,656 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:44:03,885 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:44:03,885 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:44:12,659 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:44:19,045 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:44:19,046 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:44:24,664 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:44:30,209 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:44:34,203 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:44:34,204 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:44:36,669 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:44:46,673 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:44:49,359 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:44:49,359 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:44:56,677 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:45:00,282 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:45:04,543 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:45:04,543 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:45:08,682 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:45:18,686 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:45:19,720 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:45:19,720 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:45:28,689 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:45:30,349 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:45:34,878 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:45:34,878 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:45:40,694 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:45:50,032 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:45:50,032 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:45:50,698 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:46:00,410 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:46:02,703 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:46:05,807 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:46:05,808 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:46:12,707 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:46:20,975 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:46:20,975 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:46:22,711 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:46:30,470 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:46:32,715 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:46:36,143 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:46:36,144 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:46:44,720 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:46:51,295 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:46:51,296 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:46:54,724 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:47:00,530 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:47:06,449 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:47:06,449 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:47:06,729 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:47:16,733 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:47:21,601 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:47:21,601 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:47:26,737 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:47:30,591 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:47:36,757 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:47:36,758 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:47:38,742 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:47:49,746 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:47:51,906 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:47:51,907 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:48:00,651 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:48:01,751 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:48:07,056 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:48:07,056 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:48:11,755 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:48:21,759 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:48:22,252 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:48:22,252 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:48:30,723 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:48:33,763 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:48:37,402 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:48:37,403 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:48:43,767 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:48:52,561 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:48:52,562 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:48:55,772 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:49:00,798 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:49:05,776 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:49:07,712 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:49:07,712 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:49:15,781 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:49:22,869 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:49:22,869 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:49:27,786 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:49:30,870 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:49:37,790 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:49:38,023 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:49:38,024 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:49:49,795 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:49:53,232 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:49:53,233 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:49:59,800 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:50:00,942 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:50:08,381 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:50:08,381 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:50:11,805 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:50:21,809 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:50:23,542 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:50:23,542 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:50:31,002 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:50:31,813 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:50:38,698 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:50:38,699 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:50:43,817 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:50:53,822 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:50:53,852 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:50:53,852 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:51:01,062 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:51:05,827 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:51:08,999 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:51:09,000 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:51:15,831 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:51:24,152 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:51:24,152 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:51:25,835 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:51:31,121 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:51:37,840 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:51:39,307 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:51:39,308 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:51:47,845 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:51:54,454 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:51:54,455 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:51:57,849 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:52:01,185 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:52:09,606 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:52:09,606 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:52:09,854 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:52:20,858 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:52:24,761 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:52:24,761 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:52:30,862 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:52:31,246 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:52:39,919 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:52:39,920 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:52:42,867 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:52:52,872 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:52:55,081 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:52:55,082 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:53:01,306 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:53:04,877 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:53:10,224 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:53:10,224 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:53:14,882 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:53:25,392 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:53:25,393 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:53:26,888 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:53:31,366 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:53:36,892 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:53:40,546 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:53:40,546 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:53:46,896 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:53:55,695 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:53:55,695 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:53:56,901 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:54:01,426 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:54:08,906 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:54:10,860 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:54:10,860 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:54:20,911 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:54:26,018 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:54:26,018 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:54:30,915 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:54:31,486 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:54:40,919 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:54:41,183 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:54:41,183 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:54:52,924 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:54:56,343 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:54:56,343 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:55:01,555 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:55:04,929 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:55:11,483 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:55:11,483 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:55:14,933 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:55:24,937 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:55:26,633 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:55:26,634 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:55:31,614 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:55:36,942 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:55:41,793 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:55:41,793 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:55:46,946 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:55:56,953 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:55:56,953 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:55:58,951 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:56:03,778 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:56:08,955 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:56:16,952 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:56:16,952 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:56:19,960 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:56:31,735 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:56:31,965 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:56:32,110 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:56:32,110 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:56:41,970 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:56:47,269 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:56:47,269 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:56:51,974 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:57:01,798 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:57:02,440 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:57:02,441 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:57:03,980 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:57:13,984 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:57:17,596 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:57:17,596 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:57:25,989 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:57:31,858 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:57:32,762 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:57:32,762 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:57:35,994 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:57:45,998 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:57:47,947 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:57:47,948 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:57:58,003 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:58:01,918 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:58:03,105 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:58:03,105 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:58:08,007 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:58:18,273 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:58:18,274 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:58:20,013 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:58:30,017 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:58:31,978 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:58:33,439 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:58:33,439 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:58:42,022 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:58:48,589 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:58:48,589 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:58:52,026 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:59:02,039 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:59:03,757 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:59:03,757 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:59:04,031 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:59:14,036 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:59:18,918 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:59:18,919 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:59:24,040 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:59:32,099 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 19:59:34,080 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:59:34,080 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:59:36,045 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:59:44,049 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 19:59:49,232 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 19:59:49,233 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 19:59:56,054 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:00:02,158 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:00:04,382 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:00:04,382 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:00:08,059 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:00:18,063 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:00:19,544 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:00:19,544 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:00:30,068 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:00:32,218 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:00:34,696 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:00:34,696 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:00:40,072 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:00:49,856 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:00:49,856 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:00:50,077 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:01:02,083 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:01:02,278 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:01:05,061 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:01:05,062 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:01:15,088 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:01:20,256 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:01:20,256 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:01:25,093 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:01:32,338 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:01:35,097 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:01:35,407 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:01:35,408 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:01:47,102 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:01:50,566 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:01:50,566 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:01:57,106 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:02:02,398 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:02:05,736 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:02:05,736 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:02:07,110 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:02:19,115 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:02:20,913 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:02:20,914 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:02:29,119 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:02:32,458 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:02:36,088 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:02:36,088 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:02:41,124 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:02:51,128 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:02:51,251 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:02:51,251 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:03:01,132 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:03:02,517 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:03:06,409 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:03:06,409 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:03:13,137 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:03:21,569 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:03:21,570 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:03:23,140 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:03:32,578 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:03:35,145 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:03:36,729 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:03:36,729 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:03:45,150 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:03:51,906 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:03:51,906 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:03:57,155 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:04:02,638 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:04:07,048 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:04:07,048 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:04:07,159 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:04:17,164 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:04:22,202 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:04:22,203 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:04:29,169 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:04:32,698 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:04:37,371 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:04:37,372 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:04:39,174 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:04:51,179 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:04:52,527 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:04:52,527 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:05:01,183 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:05:02,758 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:05:07,690 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:05:07,691 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:05:13,188 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:05:22,872 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:05:22,873 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:05:23,193 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:05:32,818 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:05:35,198 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:05:38,026 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:05:38,027 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:05:46,203 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:05:53,190 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:05:53,191 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:05:56,207 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:06:02,881 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:06:08,212 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:06:08,353 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:06:08,354 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:06:18,217 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:06:23,529 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:06:23,529 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:06:30,222 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:06:32,954 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:06:38,680 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:06:38,680 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:06:40,226 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:06:52,232 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:06:53,842 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:06:53,842 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:07:02,237 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:07:03,014 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:07:09,014 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:07:09,015 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:07:14,242 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:07:24,163 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:07:24,164 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:07:24,246 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:07:33,074 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:07:34,250 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:07:39,326 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:07:39,327 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:07:46,255 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:07:54,479 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:07:54,480 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:07:56,259 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:08:03,134 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:08:08,264 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:08:09,625 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:08:09,625 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:08:18,268 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:08:24,793 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:08:24,793 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:08:28,273 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:08:33,193 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:08:39,945 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:08:39,946 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:08:40,278 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:08:50,282 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:08:55,123 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:08:55,123 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:09:02,287 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:09:03,254 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:09:10,277 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:09:10,278 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:09:12,291 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:09:24,296 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:09:25,427 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:09:25,427 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:09:33,318 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:09:34,301 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:09:40,574 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:09:40,575 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:09:44,305 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:09:55,728 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:09:55,729 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:09:56,310 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:10:03,388 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:10:06,314 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:10:10,884 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:10:10,885 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:10:19,319 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:10:26,049 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:10:26,050 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:10:29,324 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:10:33,454 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:10:41,202 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:10:41,202 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:10:41,329 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:10:51,333 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:10:56,359 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:10:56,360 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:11:03,338 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:11:03,514 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:11:11,544 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:11:11,544 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:11:13,342 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:11:25,347 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:11:26,696 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:11:26,696 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:11:33,574 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:11:35,352 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:11:41,848 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:11:41,848 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:11:45,356 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:11:56,996 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:11:56,997 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:11:57,362 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:12:03,634 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:12:07,366 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:12:12,163 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:12:12,163 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:12:19,371 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:12:27,312 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:12:27,312 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:12:29,376 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:12:33,693 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:12:41,381 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:12:42,462 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:12:42,462 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:12:51,386 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:12:57,618 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:12:57,618 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:13:03,391 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:13:03,754 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:13:12,777 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:13:12,777 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:13:13,395 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:13:23,399 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:13:27,938 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:13:27,939 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:13:33,813 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:13:35,404 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:13:43,093 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:13:43,093 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:13:45,409 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:13:57,414 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:13:58,247 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:13:58,248 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:14:03,873 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:14:07,418 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:14:13,400 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:14:13,401 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:14:17,422 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:14:28,557 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:14:28,557 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:14:30,428 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:14:33,934 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:14:40,432 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:14:43,731 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:14:43,731 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:14:50,436 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:14:58,884 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:14:58,885 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:15:02,441 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:15:04,001 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:15:12,445 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:15:14,035 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:15:14,035 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:15:24,451 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:15:29,203 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:15:29,203 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:15:34,062 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:15:34,455 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:15:44,364 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:15:44,364 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:15:46,460 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:15:56,464 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:15:59,523 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:15:59,523 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:16:04,121 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:16:06,468 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:16:14,686 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:16:14,686 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:16:18,473 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:16:28,478 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:16:29,992 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:16:29,993 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:16:34,182 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:16:40,483 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:16:45,147 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:16:45,148 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:16:50,488 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:17:00,286 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:17:00,286 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:17:00,492 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:17:04,242 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:17:12,496 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:17:15,450 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:17:15,451 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:17:22,501 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:17:30,605 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:17:30,606 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:17:34,302 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:17:34,506 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:17:44,510 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:17:45,772 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:17:45,773 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:17:56,515 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:18:00,928 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:18:00,929 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:18:04,363 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:18:06,520 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:18:16,083 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:18:16,083 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:18:16,524 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:18:28,530 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:18:31,233 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:18:31,233 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:18:34,423 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:18:38,534 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:18:46,379 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:18:46,380 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:18:51,539 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:19:01,535 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:19:01,536 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:19:01,544 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:19:04,481 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:19:11,548 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:19:16,684 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:19:16,685 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:19:23,553 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:19:31,835 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:19:31,835 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:19:33,557 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:19:34,541 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:19:43,561 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:19:45,562 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:19:47,053 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:19:47,053 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:19:47,563 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:19:49,564 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:19:51,565 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:19:53,566 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:19:55,567 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:19:57,568 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:19:59,569 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:01,569 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:02,258 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:20:02,258 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:20:03,570 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:04,612 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:20:05,571 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:07,572 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:09,573 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:11,574 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:13,575 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:15,575 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:17,397 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:20:17,398 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:20:17,576 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:19,577 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:21,578 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:23,579 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:25,580 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:27,581 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:29,582 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:31,583 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:32,583 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:20:32,583 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:20:33,586 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:34,685 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:20:35,587 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:37,588 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:39,589 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:41,590 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:43,591 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:45,592 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:47,593 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:47,755 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:20:47,755 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:20:49,594 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:51,594 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:53,596 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:55,597 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:57,598 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:20:59,599 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:01,600 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:02,922 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:21:02,923 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:21:04,601 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:04,759 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:21:06,602 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:08,603 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:10,604 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:12,605 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:14,606 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:16,607 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:18,080 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:21:18,080 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:21:18,608 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:20,609 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:22,610 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:24,611 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:26,612 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:28,613 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:30,614 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:32,615 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:33,261 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:21:33,261 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:21:34,616 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:34,833 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:21:36,617 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:38,618 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:40,619 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:42,619 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:44,620 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:46,621 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:48,428 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:21:48,428 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:21:48,622 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:50,623 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:52,624 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:54,625 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:21:58,627 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:22:00,628 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:22:02,629 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:22:03,563 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:22:03,563 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:22:04,630 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:22:04,909 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:22:06,631 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:22:08,632 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:22:10,633 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:22:12,634 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:22:18,733 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:22:18,733 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:22:24,639 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:22:33,895 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:22:33,895 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:22:34,970 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:22:36,645 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:22:48,650 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:22:49,056 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:22:49,056 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:22:58,654 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:23:04,213 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:23:04,213 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:23:05,030 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:23:10,660 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:23:19,375 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:23:19,376 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:23:22,665 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:23:34,556 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:23:34,556 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:23:34,670 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:23:35,090 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:23:44,674 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:23:49,712 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:23:49,712 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:23:58,680 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:24:04,883 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:24:04,884 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:24:05,150 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:24:11,685 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:24:20,037 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:24:20,038 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:24:21,689 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:24:33,694 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:24:35,210 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:24:35,217 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:24:35,217 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:24:45,699 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:24:50,378 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:24:50,378 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:24:57,704 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:25:05,271 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:25:05,544 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:25:05,544 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:25:07,709 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:25:19,714 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:25:20,689 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:25:20,690 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:25:31,719 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:25:35,331 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:25:35,850 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:25:35,850 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:25:43,724 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:25:50,998 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:25:50,998 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:25:55,729 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:26:05,394 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:26:06,161 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:26:06,162 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:26:07,734 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:26:19,739 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:26:21,310 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:26:21,311 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:26:31,744 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:26:35,454 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:26:36,478 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:26:36,479 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:26:43,749 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:26:51,624 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:26:51,624 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:26:55,754 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:27:05,514 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:27:07,015 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:27:07,015 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:27:07,759 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:27:19,764 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:27:22,246 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:27:22,246 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:27:31,769 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:27:35,576 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:27:37,409 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:27:37,409 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:27:41,773 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:27:52,578 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:27:52,578 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:27:53,778 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:28:05,637 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:28:05,783 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:28:07,725 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:28:07,725 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:28:17,788 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:28:22,867 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:28:22,867 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:28:27,792 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:28:35,698 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:28:38,034 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:28:38,034 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:28:39,797 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:28:52,803 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:28:53,182 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:28:53,182 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:29:02,807 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:29:05,758 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:29:08,363 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:29:08,364 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:29:14,812 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:29:23,507 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:29:23,508 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:29:26,817 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:29:35,818 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:29:38,664 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:29:38,664 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:29:38,822 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:29:48,826 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:29:53,809 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:29:53,809 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:30:00,832 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:30:05,878 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:30:08,975 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:30:08,975 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:30:12,837 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:30:24,131 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:30:24,131 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:30:24,843 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:30:35,939 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:30:36,848 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:30:39,304 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:30:39,305 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:30:48,853 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:30:54,448 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:30:54,448 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:31:00,858 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:31:05,999 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:31:09,602 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:31:09,603 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:31:12,864 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:31:24,761 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:31:24,761 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:31:24,869 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:31:34,873 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:31:36,058 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:31:39,922 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:31:39,922 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:31:46,878 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:31:55,103 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:31:55,103 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:31:58,884 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:32:06,121 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:32:10,271 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:32:10,271 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:32:10,889 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:32:14,891 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:32:25,423 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:32:25,423 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:32:26,896 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:32:36,181 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:32:36,900 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:32:40,574 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:32:40,575 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:32:48,905 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:32:55,730 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:32:55,730 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:33:00,910 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:33:06,242 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:33:10,882 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:33:10,882 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:33:12,915 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:33:22,919 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:33:26,031 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:33:26,032 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:33:33,924 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:33:36,302 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:33:41,181 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:33:41,181 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:33:45,929 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:33:56,333 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:33:56,333 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:33:57,934 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:34:06,362 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:34:09,940 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:34:11,532 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:34:11,532 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:34:19,944 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:34:26,679 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:34:26,680 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:34:31,949 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:34:36,423 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:34:41,850 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:34:41,850 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:34:43,954 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:34:47,956 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:34:49,957 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:34:51,958 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:34:53,959 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:34:55,960 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:34:56,988 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:34:56,988 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:34:57,961 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:34:59,962 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:35:01,963 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:35:03,964 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:35:05,965 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:35:06,494 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:35:11,968 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:35:12,137 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 20:35:12,137 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 20:35:13,968 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:35:15,969 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:35:17,970 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:35:18,373 DEBUG   SenderThread:1749416 [sender.py:send():234] send: telemetry
+2022-07-29 20:35:18,373 DEBUG   SenderThread:1749416 [sender.py:send():234] send: exit
+2022-07-29 20:35:18,373 INFO    SenderThread:1749416 [sender.py:send_exit():366] handling exit code: 1
+2022-07-29 20:35:18,375 INFO    SenderThread:1749416 [sender.py:send_exit():368] handling runtime: 4912
+2022-07-29 20:35:18,376 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 20:35:18,376 INFO    SenderThread:1749416 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 20:35:18,377 INFO    SenderThread:1749416 [sender.py:send_exit():374] send defer
+2022-07-29 20:35:18,377 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 20:35:18,378 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 20:35:18,378 INFO    HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-29 20:35:18,378 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: defer
+2022-07-29 20:35:18,378 INFO    SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-29 20:35:18,378 INFO    SenderThread:1749416 [sender.py:transition_state():387] send defer: 1
+2022-07-29 20:35:18,379 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 20:35:18,379 INFO    HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-29 20:35:18,424 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: defer
+2022-07-29 20:35:18,424 INFO    SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-29 20:35:18,424 INFO    SenderThread:1749416 [sender.py:transition_state():387] send defer: 2
+2022-07-29 20:35:18,424 DEBUG   SenderThread:1749416 [sender.py:send():234] send: stats
+2022-07-29 20:35:18,424 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 20:35:18,424 INFO    HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-29 20:35:18,425 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: defer
+2022-07-29 20:35:18,425 INFO    SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-29 20:35:18,425 INFO    SenderThread:1749416 [sender.py:transition_state():387] send defer: 3
+2022-07-29 20:35:18,425 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 20:35:18,425 INFO    HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-29 20:35:18,425 DEBUG   SenderThread:1749416 [sender.py:send():234] send: summary
+2022-07-29 20:35:18,426 INFO    SenderThread:1749416 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 20:35:18,426 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: defer
+2022-07-29 20:35:18,426 INFO    SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-29 20:35:18,426 INFO    SenderThread:1749416 [sender.py:transition_state():387] send defer: 4
+2022-07-29 20:35:18,426 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 20:35:18,426 INFO    HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-29 20:35:18,426 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: defer
+2022-07-29 20:35:18,426 INFO    SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-29 20:35:18,479 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 20:35:18,605 INFO    SenderThread:1749416 [sender.py:transition_state():387] send defer: 5
+2022-07-29 20:35:18,605 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 20:35:18,606 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 20:35:18,606 INFO    HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-29 20:35:18,606 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: defer
+2022-07-29 20:35:18,606 INFO    SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-29 20:35:18,606 INFO    SenderThread:1749416 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-29 20:35:18,707 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 20:35:18,971 INFO    Thread-8  :1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/config.yaml
+2022-07-29 20:35:18,971 INFO    SenderThread:1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:35:18,971 INFO    SenderThread:1749416 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/wandb-summary.json
+2022-07-29 20:35:18,972 INFO    SenderThread:1749416 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files
+2022-07-29 20:35:18,972 INFO    SenderThread:1749416 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/config.yaml config.yaml
+2022-07-29 20:35:18,972 INFO    SenderThread:1749416 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/requirements.txt requirements.txt
+2022-07-29 20:35:18,972 INFO    SenderThread:1749416 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log output.log
+2022-07-29 20:35:18,973 INFO    SenderThread:1749416 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/wandb-summary.json wandb-summary.json
+2022-07-29 20:35:18,973 INFO    SenderThread:1749416 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/wandb-metadata.json wandb-metadata.json
+2022-07-29 20:35:18,976 INFO    SenderThread:1749416 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-29 20:35:18,976 INFO    SenderThread:1749416 [sender.py:transition_state():387] send defer: 6
+2022-07-29 20:35:18,976 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 20:35:18,982 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 20:35:18,982 INFO    HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-29 20:35:18,982 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: defer
+2022-07-29 20:35:18,982 INFO    SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-29 20:35:18,982 INFO    SenderThread:1749416 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 20:35:19,081 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 20:35:19,081 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 20:35:19,182 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 20:35:19,183 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 20:35:19,284 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 20:35:19,284 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 20:35:19,386 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 20:35:19,386 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 20:35:19,464 INFO    Thread-14 :1749416 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/requirements.txt
+2022-07-29 20:35:19,466 INFO    Thread-15 :1749416 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/output.log
+2022-07-29 20:35:19,487 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 20:35:19,488 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 20:35:19,543 INFO    Thread-13 :1749416 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/config.yaml
+2022-07-29 20:35:19,584 INFO    Thread-16 :1749416 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/files/wandb-summary.json
+2022-07-29 20:35:19,589 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 20:35:19,589 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 20:35:19,691 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 20:35:19,691 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 20:35:19,784 INFO    Thread-7  :1749416 [sender.py:transition_state():387] send defer: 7
+2022-07-29 20:35:19,785 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 20:35:19,785 INFO    HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-29 20:35:19,785 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: defer
+2022-07-29 20:35:19,785 INFO    SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-29 20:35:19,792 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 20:35:20,263 INFO    SenderThread:1749416 [sender.py:transition_state():387] send defer: 8
+2022-07-29 20:35:20,263 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 20:35:20,264 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 20:35:20,264 INFO    HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-29 20:35:20,264 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: defer
+2022-07-29 20:35:20,264 INFO    SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-29 20:35:20,264 INFO    SenderThread:1749416 [sender.py:transition_state():387] send defer: 9
+2022-07-29 20:35:20,265 DEBUG   SenderThread:1749416 [sender.py:send():234] send: final
+2022-07-29 20:35:20,265 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 20:35:20,265 INFO    HandlerThread:1749416 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-29 20:35:20,265 DEBUG   SenderThread:1749416 [sender.py:send():234] send: footer
+2022-07-29 20:35:20,265 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: defer
+2022-07-29 20:35:20,265 INFO    SenderThread:1749416 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-29 20:35:20,365 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 20:35:20,365 DEBUG   SenderThread:1749416 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 20:35:20,366 INFO    SenderThread:1749416 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 20:35:20,624 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-29 20:35:20,625 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-29 20:35:20,625 DEBUG   HandlerThread:1749416 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-29 20:35:20,625 INFO    HandlerThread:1749416 [handler.py:finish():731] shutting down handler
+2022-07-29 20:35:21,266 INFO    WriterThread:1749416 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/run-ovnz8vs0.wandb
+2022-07-29 20:35:21,623 INFO    SenderThread:1749416 [sender.py:finish():1070] shutting down sender
+2022-07-29 20:35:21,623 INFO    SenderThread:1749416 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 20:35:21,623 INFO    SenderThread:1749416 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 20:35:21,626 INFO    MainThread:1749416 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220729_191324-ovnz8vs0/logs/debug.log b/wandb/run-20220729_191324-ovnz8vs0/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..ec544f004b267b794570dff767a374deb6742558
--- /dev/null
+++ b/wandb/run-20220729_191324-ovnz8vs0/logs/debug.log
@@ -0,0 +1,139 @@
+2022-07-29 19:13:24,832 INFO    MainThread:1748149 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-29 19:13:24,833 INFO    MainThread:1748149 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-29 19:13:24,833 INFO    MainThread:1748149 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/logs/debug.log
+2022-07-29 19:13:24,833 INFO    MainThread:1748149 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_191324-ovnz8vs0/logs/debug-internal.log
+2022-07-29 19:13:24,833 INFO    MainThread:1748149 [wandb_init.py:init():404] calling init triggers
+2022-07-29 19:13:24,833 INFO    MainThread:1748149 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-29 19:13:24,833 INFO    MainThread:1748149 [wandb_init.py:init():460] starting backend
+2022-07-29 19:13:24,833 INFO    MainThread:1748149 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-29 19:13:24,861 INFO    MainThread:1748149 [backend.py:ensure_launched():216] starting backend process...
+2022-07-29 19:13:24,887 INFO    MainThread:1748149 [backend.py:ensure_launched():221] started backend process with pid: 1749416
+2022-07-29 19:13:24,890 INFO    MainThread:1748149 [wandb_init.py:init():469] backend started and connected
+2022-07-29 19:13:24,904 INFO    MainThread:1748149 [wandb_init.py:init():533] updated telemetry
+2022-07-29 19:13:24,968 INFO    MainThread:1748149 [wandb_init.py:init():563] communicating current version
+2022-07-29 19:13:25,686 INFO    MainThread:1748149 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-29 19:13:25,687 INFO    MainThread:1748149 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-29 19:13:25,860 INFO    MainThread:1748149 [wandb_init.py:init():606] starting run threads in backend
+2022-07-29 19:13:28,230 INFO    MainThread:1748149 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-29 19:13:28,231 INFO    MainThread:1748149 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-29 19:13:28,231 INFO    MainThread:1748149 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-29 19:13:28,233 INFO    MainThread:1748149 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-29 19:13:28,234 INFO    MainThread:1748149 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-29 20:35:15,919 INFO    MainThread:1748149 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-29 20:35:15,924 INFO    MainThread:1748149 [wandb_run.py:_restore():1752] restore
+2022-07-29 20:35:18,378 INFO    MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73703
+  total_bytes: 73703
+}
+
+2022-07-29 20:35:18,606 INFO    MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73703
+  total_bytes: 73703
+}
+
+2022-07-29 20:35:18,980 INFO    MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73703
+  total_bytes: 106250
+}
+
+2022-07-29 20:35:19,081 INFO    MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73703
+  total_bytes: 106250
+}
+
+2022-07-29 20:35:19,183 INFO    MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 106221
+  total_bytes: 106250
+}
+
+2022-07-29 20:35:19,285 INFO    MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 106250
+  total_bytes: 106250
+}
+
+2022-07-29 20:35:19,387 INFO    MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 106250
+  total_bytes: 106250
+}
+
+2022-07-29 20:35:19,488 INFO    MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 106250
+  total_bytes: 106250
+}
+
+2022-07-29 20:35:19,590 INFO    MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 106250
+  total_bytes: 106250
+}
+
+2022-07-29 20:35:19,692 INFO    MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 106250
+  total_bytes: 106250
+}
+
+2022-07-29 20:35:20,264 INFO    MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 106250
+  total_bytes: 106250
+}
+
+2022-07-29 20:35:20,623 INFO    MainThread:1748149 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 106250
+  total_bytes: 106250
+}
+local_info {
+}
+
+2022-07-29 20:35:22,185 INFO    MainThread:1748149 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220729_191324-ovnz8vs0/run-ovnz8vs0.wandb b/wandb/run-20220729_191324-ovnz8vs0/run-ovnz8vs0.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..271c3bcc64380e651b07d8ebb9d19754b8ffe51f
--- /dev/null
+++ b/wandb/run-20220729_191324-ovnz8vs0/run-ovnz8vs0.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10a1f120fdad4cd704fa8fa782f094f951375748df0a0368ade5826d6e49d2a1
+size 351403
diff --git a/wandb/run-20220729_213705-23c375az/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_213705-23c375az/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..125deac0a5e7358200646f43d7adfd25d2bbcc3e
--- /dev/null
+++ b/wandb/run-20220729_213705-23c375az/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1597 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(map_nst).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(map_npsc).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220729_213705-23c375az/files/config.yaml b/wandb/run-20220729_213705-23c375az/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3c1e2e3414ada42d2d59f6f5ff278f2eb4e6c8f5
--- /dev/null
+++ b/wandb/run-20220729_213705-23c375az/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659130625
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220729_213705-23c375az/files/output.log b/wandb/run-20220729_213705-23c375az/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..321e4f8373aff394413cdc761e1b615793b921cd
--- /dev/null
+++ b/wandb/run-20220729_213705-23c375az/files/output.log
@@ -0,0 +1,891 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul29_21-37-01_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=128,
+per_device_train_batch_size=128,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 81.71it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 468.32it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a479b8802b3f5567.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b93c3063e1e3e193.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-153fb89ac3bac9ae.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2f20a68a38894f5b.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-92f96a9049447122.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c1954d791f874a7f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c645a2a40522c3f0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24cfb6035bbadcbb.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-09d11f511da96fdf.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8c47b02ac891d8ec.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-aac9470589015a7c.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-474509ecbe190df5.arrow
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/vocab.json from cache at /home/javierr/.cache/huggingface/transformers/dbb47708b30c45561aee53d4cc808c3ed6c4785df1e2b757c8852837f6f58537.cd4a163fb068cd596a09687d4b1d7e345279d1497fdf547b3c986f4d6ee855b8
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/tokenizer_config.json from cache at /home/javierr/.cache/huggingface/transformers/32ac0765a83a8c538f4d3d8f104f69eab9648cb4d3e3a6bb6463185512f204d4.ff272c14a3ca512c9d9f85a898e6a086e8f790b06ff540f484000dab53132349
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/added_tokens.json from cache at /home/javierr/.cache/huggingface/transformers/a86e893e88bb40472d24ffc4e2be032578bb14755e88bb1e961bad6276161a17.23a5f4e269f91a88f81d8cb3c6f881c8eb30ad98b9c9691569d4cfb87512d722
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/special_tokens_map.json from cache at /home/javierr/.cache/huggingface/transformers/c0891e9bf7d08f1e6d4c0d7da3104155480b7f65b84d2e1ef0506f017fd60443.2fce57d7d6ae62285f9966753bbdba194f7ff904d5f380e3bb6a6242819f9a93
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin not found in cache or force_download set to True, downloading to /home/javierr/.cache/huggingface/transformers/tmpcl_om3tp
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Downloading pytorch_model.bin: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.60G/3.60G [00:47<00:00, 82.1MB/s]
+storing https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin in cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+creating metadata file for /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_q', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('project_hid', 'kernel'), ('quantizer', 'codevectors'), ('project_q', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('project_hid', 'bias')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7733.71ex/s]
+removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8335.60ex/s]
+removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7811.68ex/s]
+removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8176.30ex/s]
+removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8055.27ex/s]
+removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7889.70ex/s]
+removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 6976.44ex/s]
+removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7969.58ex/s]
+removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7694.00ex/s]
+removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7986.11ex/s]
+removing punctuation from train split #7:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                          | 8234/9523 [00:01<00:00, 6491.17ex/s]
+removing punctuation from train split #6:  97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████     | 9284/9523 [00:01<00:00, 7224.41ex/s]
+removing punctuation from train split #7:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████        | 9138/9523 [00:01<00:00, 7105.42ex/s]
+removing punctuation from train split #8:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████      | 9233/9523 [00:01<00:00, 7266.53ex/s]
+removing punctuation from train split #10:  85%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                              | 8060/9523 [00:01<00:00, 7184.65ex/s]
+removing punctuation from train split #15:  54%|██████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                          | 5153/9522 [00:00<00:00, 5635.98ex/s]
+removing punctuation from train split #11:  85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                            | 8124/9523 [00:01<00:00, 7121.05ex/s]
+removing punctuation from train split #10:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌            | 8916/9523 [00:01<00:00, 7542.07ex/s]
+removing punctuation from train split #9:  92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                | 8750/9523 [00:01<00:00, 6412.77ex/s]
+removing punctuation from train split #12:  87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                         | 8283/9522 [00:01<00:00, 7452.15ex/s]
+removing punctuation from train split #15:  73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                    | 6983/9522 [00:00<00:00, 7157.87ex/s]
+removing punctuation from train split #16:  65%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                    | 6219/9522 [00:00<00:00, 7162.74ex/s]
+removing punctuation from train split #17:  66%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                   | 6237/9522 [00:00<00:00, 7167.32ex/s]
+removing punctuation from train split #13:  76%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                               | 7210/9522 [00:01<00:00, 6207.16ex/s]
+removing punctuation from train split #20:  47%|███████████████████████████████████████████████████████████████████████████████████████████▊                                                                                                        | 4458/9522 [00:00<00:00, 6375.22ex/s]
+removing punctuation from train split #15:  83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                 | 7894/9522 [00:01<00:00, 7677.64ex/s]
+removing punctuation from train split #16:  75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                | 7162/9522 [00:00<00:00, 7771.07ex/s]
+removing punctuation from train split #17:  75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                | 7179/9522 [00:00<00:00, 7769.81ex/s]
+removing punctuation from train split #25:   0%|                                                                                                                                                                                                                 | 0/9522 [00:00<?, ?ex/s]
+removing punctuation from train split #24:  34%|███████████████████████████████████████████████████████████████████▎                                                                                                                                | 3268/9522 [00:00<00:00, 6575.20ex/s]
+removing punctuation from train split #22:  46%|██████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                         | 4394/9522 [00:00<00:00, 6914.46ex/s]
+removing punctuation from train split #21:  55%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                       | 5253/9522 [00:00<00:00, 7501.48ex/s]
+removing punctuation from train split #28:   0%|                                                                                                                                                                                                                 | 0/9522 [00:00<?, ?ex/s]
+removing punctuation from train split #24:  44%|██████████████████████████████████████████████████████████████████████████████████████▌                                                                                                             | 4206/9522 [00:00<00:00, 7427.36ex/s]
+removing punctuation from train split #27:   5%|██████████▍                                                                                                                                                                                          | 503/9522 [00:00<00:01, 5025.07ex/s]
+removing punctuation from train split #26:  16%|██████████████████████████████▊                                                                                                                                                                     | 1494/9522 [00:00<00:01, 7800.16ex/s]
+removing punctuation from train split #28:   7%|█████████████▌                                                                                                                                                                                       | 656/9522 [00:00<00:01, 6549.01ex/s]
+removing punctuation from train split #29:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                         | 6725/9522 [00:00<00:00, 9891.96ex/s]
+removing punctuation from train split #28:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍     | 9254/9522 [00:01<00:00, 9750.12ex/s]
+removing punctuation from train split #30:  69%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                           | 6616/9522 [00:00<00:00, 9779.28ex/s]
+removing punctuation from train split #31:  60%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                             | 5725/9522 [00:00<00:00, 10029.92ex/s]
+removing punctuation from train split #29:  93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌              | 8815/9522 [00:00<00:00, 10159.60ex/s]
+removing punctuation from train split #30:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                      | 7653/9522 [00:00<00:00, 9966.76ex/s]
+removing punctuation from train split #30:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                 | 8667/9522 [00:00<00:00, 10019.54ex/s]
+filtering data where the targets are ignored in scoring #19: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 115.76ba/s]
+filtering data where the targets are ignored in scoring #20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 115.59ba/s]
+filtering data where the targets are ignored in scoring #21: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 117.51ba/s]
+filtering data where the targets are ignored in scoring #23: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 122.18ba/s]
+filtering data where the targets are ignored in scoring #24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 122.89ba/s]
+filtering data where the targets are ignored in scoring #22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 124.71ba/s]
+filtering data where the targets are ignored in scoring #27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 125.21ba/s]
+filtering data where the targets are ignored in scoring #26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 126.27ba/s]
+filtering data where the targets are ignored in scoring #25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 127.58ba/s]
+filtering data where the targets are ignored in scoring #28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 126.70ba/s]
+filtering data where the targets are ignored in scoring #29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 127.12ba/s]
+filtering data where the targets are ignored in scoring #30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 132.43ba/s]
+filtering data where the targets are ignored in scoring #31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 142.68ba/s]
+https://symbolize.stripped_domain/r/?trace=7f79fe149f51,7f7a0026b0bf&map=
+*** SIGTERM received by PID 1293335 (TID 1293335) on cpu 60 from PID 1290249; stack trace: ***                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+PC: @     0x7f79fe149f51  (unknown)  arrow::SimpleRecordBatch::~SimpleRecordBatch()
+    @     0x7f77b0a52294        976  (unknown)n scoring #19:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+    @     0x7f7a0026b0c0  (unknown)  (unknown)
+    @ ... and at least 1 more frames
+https://symbolize.stripped_domain/r/?trace=7f79fe149f51,7f77b0a52293,7f7a0026b0bf&map=fbcd4e3f2be272741f2aecd9d840a066:7f779b4b5000-7f77b0de4c60                                                                                                                   | 0/10 [00:00<?, ?ba/s]
+E0729 21:38:39.230481 1293335 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0729 21:38:39.240951 1293335 process_state.cc:774] RAW: Raising signal 15 with default behavior
+filtering data where the targets are ignored in scoring #21:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #23:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #22:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #25:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #26:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #28:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #29:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #30:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 116.66ba/s]
+filtering data where the targets are ignored in scoring #21: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 76.65ba/s]
+filtering data where the targets are ignored in scoring #23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 114.60ba/s]
+filtering data where the targets are ignored in scoring #24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 113.05ba/s]
+filtering data where the targets are ignored in scoring #25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 117.58ba/s]
+filtering data where the targets are ignored in scoring #26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 115.50ba/s]
+filtering data where the targets are ignored in scoring #27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 117.66ba/s]
+filtering data where the targets are ignored in scoring #29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 119.86ba/s]
+filtering data where the targets are ignored in scoring #28: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 76.75ba/s]
+filtering data where the targets are ignored in scoring #30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 124.56ba/s]
+filtering data where the targets are ignored in scoring #31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.17ba/s]
+filtering data where the targets are ignored in scoring #17:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #18:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #19:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #20:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #21:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #22:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #23:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #24:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #25:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #26:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #27:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #28:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #29:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #30:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 112.05ba/s]
+filtering data where the targets are ignored in scoring #30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 120.10ba/s]
+filtering data where the targets are ignored in scoring #31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 121.20ba/s]
+https://symbolize.stripped_domain/r/?trace=7f7a002173f4,7f7a0026b0bf,7f,3d1613d13b8d3040&map=                                                                                                                                                                       | 0/3 [00:00<?, ?ba/s]
+*** SIGTERM received by PID 1293580 (TID 1293580) on cpu 49 from PID 1290249; stack trace: ***                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+PC: @     0x7f7a002173f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7f77b0a52294        976  (unknown)
+    @     0x7f7a0026b0c0  894774416  (unknown)n scoring #19:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+    @               0x80  (unknown)  (unknown)
+    @ 0x3d1613d13b8d3041  (unknown)  (unknown)n scoring #20:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+https://symbolize.stripped_domain/r/?trace=7f7a002173f4,7f77b0a52293,7f7a0026b0bf,7f,3d1613d13b8d3040&map=fbcd4e3f2be272741f2aecd9d840a066:7f779b4b5000-7f77b0de4c60
+E0729 21:38:46.936198 1293580 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                               | 0/3 [00:00<?, ?ba/s]
+E0729 21:38:46.980330 1293580 process_state.cc:774] RAW: Raising signal 15 with default behavior
+filtering data where the targets are ignored in scoring #22:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #23:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #24:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #25:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #26:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #27:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #28:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #30:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #31:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+preprocess dataset #20:   0%|                                                                                                                                                                                                                                    | 0/9496 [00:00<?, ?ex/s]
+preprocess dataset #21:   0%|                                                                                                                                                                                                                                    | 0/9496 [00:00<?, ?ex/s]
+preprocess dataset #23:   1%|█▎                                                                                                                                                                                                                         | 57/9496 [00:02<04:19, 36.31ex/s]
+preprocess dataset #24:   0%|█                                                                                                                                                                                                                          | 44/9496 [00:01<04:44, 33.23ex/s]
+preprocess dataset #25:   0%|▉                                                                                                                                                                                                                          | 38/9496 [00:01<04:30, 35.00ex/s]
+preprocess dataset #26:   1%|█▏                                                                                                                                                                                                                         | 50/9496 [00:01<04:26, 35.42ex/s]
+preprocess dataset #27:   1%|█▏                                                                                                                                                                                                                         | 49/9496 [00:01<04:35, 34.29ex/s]
+preprocess dataset #28:   1%|█▏                                                                                                                                                                                                                         | 49/9496 [00:01<04:36, 34.13ex/s]
+preprocess dataset #29:   0%|▊                                                                                                                                                                                                                          | 34/9496 [00:01<04:53, 32.27ex/s]
+preprocess dataset #30:   1%|█▏                                                                                                                                                                                                                         | 51/9496 [00:01<04:23, 35.87ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #28:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎      | 9205/9496 [05:00<00:07, 40.45ex/s]
+preprocess dataset #29:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 9326/9496 [05:00<00:06, 28.04ex/s]
+preprocess dataset #30:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌    | 9303/9496 [05:00<00:06, 31.90ex/s]
+preprocess dataset #29:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 9404/9496 [05:02<00:01, 49.98ex/s]
+preprocess dataset #30:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 9381/9496 [05:02<00:02, 46.96ex/s]
+preprocess dataset #28:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉   | 9361/9496 [05:04<00:02, 47.47ex/s]
+preprocess dataset #24:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 9404/9496 [05:04<00:02, 44.97ex/s]
+preprocess dataset #24:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 9409/9496 [05:04<00:01, 44.57ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 9494/9496 [05:04<00:00, 48.12ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 9481/9496 [05:04<00:00, 53.95ex/s]
+preprocess dataset #28:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 9391/9496 [05:04<00:02, 44.49ex/s]
+preprocess dataset #28:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 9396/9496 [05:04<00:02, 41.00ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 9477/9496 [05:06<00:00, 41.87ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 9491/9496 [05:06<00:00, 48.25ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9425/9496 [05:05<00:01, 43.36ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9441/9496 [05:06<00:01, 41.52ex/s]
+preprocess dataset #28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 9452/9496 [05:06<00:01, 38.86ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9457/9496 [05:06<00:01, 34.02ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 9475/9496 [05:06<00:00, 47.17ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 9484/9496 [05:07<00:00, 57.16ex/s]
+E0729 21:44:33.708852 1293661 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 9491/9496 [05:07<00:00, 45.79ex/s]
+https://symbolize.stripped_domain/r/?trace=7f77b0ac66a3,7f7a0026b0bf,7f77b09673e6,7f77b0a975c4,7f77b0a9772c,7f77b0a8f912,7f77b0a8f3c9,7f77b0dddbcd,7f77b0a52d0e,7f7a0026b0bf,5ee3fa,908e1f&map=fbcd4e3f2be272741f2aecd9d840a066:7f779b4b5000-7f77b0de4c60
+E0729 21:44:33.709485 1293661 process_state.cc:1062] RAW: Signal 11 raised at PC: 0x7f77b0ac66a3 while already in FailureSignalHandler!
+E0729 21:44:33.709505 1293661 process_state.cc:1097] RAW: Raising 11 signal with default behavior
+preprocess dataset #0:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #1:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #2:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #3:   4%|███████▉                                                                                                                                                                                                                    | 46/1267 [00:01<00:34, 35.34ex/s]
+preprocess dataset #4:   4%|███████▊                                                                                                                                                                                                                    | 45/1267 [00:01<00:35, 34.05ex/s]
+preprocess dataset #5:   3%|███████▍                                                                                                                                                                                                                    | 43/1267 [00:01<00:34, 35.14ex/s]
+preprocess dataset #6:   3%|███████▎                                                                                                                                                                                                                    | 42/1267 [00:01<00:37, 32.55ex/s]
+preprocess dataset #7:   4%|███████▉                                                                                                                                                                                                                    | 46/1267 [00:01<00:35, 34.79ex/s]
+preprocess dataset #8:   3%|███████                                                                                                                                                                                                                     | 41/1267 [00:01<00:34, 36.03ex/s]
+preprocess dataset #9:   4%|████████▏                                                                                                                                                                                                                   | 47/1267 [00:01<00:34, 35.16ex/s]
+preprocess dataset #10:   3%|███████▍                                                                                                                                                                                                                   | 43/1267 [00:01<00:35, 34.07ex/s]
+preprocess dataset #11:   3%|███████▎                                                                                                                                                                                                                   | 42/1267 [00:01<00:35, 34.48ex/s]
+preprocess dataset #12:   3%|███████▎                                                                                                                                                                                                                   | 42/1267 [00:01<00:33, 36.78ex/s]
+preprocess dataset #13:   3%|███████▍                                                                                                                                                                                                                   | 43/1267 [00:01<00:34, 35.17ex/s]
+preprocess dataset #14:   3%|██████▉                                                                                                                                                                                                                    | 40/1267 [00:01<00:36, 33.33ex/s]
+preprocess dataset #15:   4%|███████▊                                                                                                                                                                                                                   | 45/1267 [00:01<00:34, 35.11ex/s]
+preprocess dataset #16:   3%|███████▍                                                                                                                                                                                                                   | 43/1267 [00:01<00:35, 34.49ex/s]
+preprocess dataset #17:   3%|███████▍                                                                                                                                                                                                                   | 43/1267 [00:01<00:33, 36.10ex/s]
+preprocess dataset #18:   4%|███████▉                                                                                                                                                                                                                   | 46/1267 [00:01<00:31, 38.50ex/s]
+preprocess dataset #19:   4%|████████                                                                                                                                                                                                                   | 47/1267 [00:01<00:33, 36.08ex/s]
+preprocess dataset #20:   3%|███████▌                                                                                                                                                                                                                   | 44/1266 [00:01<00:33, 36.46ex/s]
+preprocess dataset #21:   3%|███████▎                                                                                                                                                                                                                   | 42/1266 [00:01<00:35, 34.75ex/s]
+preprocess dataset #22:   3%|███████▍                                                                                                                                                                                                                   | 43/1266 [00:01<00:34, 35.55ex/s]
+preprocess dataset #23:   3%|██████▋                                                                                                                                                                                                                    | 39/1266 [00:01<00:34, 35.58ex/s]
+preprocess dataset #24:   3%|███████                                                                                                                                                                                                                    | 41/1266 [00:01<00:33, 36.20ex/s]
+preprocess dataset #25:   3%|██████▌                                                                                                                                                                                                                    | 38/1266 [00:01<00:35, 34.18ex/s]
+preprocess dataset #26:   3%|███████▌                                                                                                                                                                                                                   | 44/1266 [00:01<00:32, 37.38ex/s]
+preprocess dataset #27:   3%|██████▌                                                                                                                                                                                                                    | 38/1266 [00:01<00:35, 34.97ex/s]
+preprocess dataset #28:   4%|███████▉                                                                                                                                                                                                                   | 46/1266 [00:01<00:32, 37.19ex/s]
+preprocess dataset #29:   3%|███████▍                                                                                                                                                                                                                   | 43/1266 [00:01<00:32, 37.37ex/s]
+preprocess dataset #30:   3%|██████▋                                                                                                                                                                                                                    | 39/1266 [00:01<00:37, 33.01ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                      | 1041/1266 [00:35<00:09, 24.54ex/s]
+preprocess dataset #25:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                         | 1116/1266 [00:37<00:04, 35.54ex/s]
+preprocess dataset #26:  87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                           | 1107/1266 [00:37<00:04, 39.19ex/s]
+preprocess dataset #27:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                        | 1126/1266 [00:37<00:03, 41.56ex/s]
+preprocess dataset #28:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍            | 1193/1266 [00:37<00:01, 37.14ex/s]
+preprocess dataset #30:  87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                           | 1107/1266 [00:37<00:03, 41.18ex/s]
+preprocess dataset #30:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                  | 1159/1266 [00:38<00:02, 41.13ex/s]
+preprocess dataset #30:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊             | 1189/1266 [00:39<00:01, 43.18ex/s]
+preprocess dataset #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 1263/1267 [00:41<00:00, 37.47ex/s]
+preprocess dataset #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1262/1267 [00:41<00:00, 42.59ex/s]
+preprocess dataset #15: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1267/1267 [00:41<00:00, 36.61ex/s]
+preprocess dataset #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 1265/1267 [00:41<00:00, 36.45ex/s]
+preprocess dataset #26:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 1256/1266 [00:41<00:00, 39.79ex/s]
+preprocess dataset #17: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1266/1267 [00:41<00:00, 39.43ex/s]
+preprocess dataset #16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1267/1267 [00:41<00:00, 41.40ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1265/1266 [00:41<00:00, 38.76ex/s]
+preprocess dataset #26: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1261/1266 [00:41<00:00, 39.78ex/s]
+preprocess dataset #7:   0%|                                                                                                                                                                                                                                     | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #8:   0%|                                                                                                                                                                                                                                     | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #9:   0%|                                                                                                                                                                                                                                     | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #10:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #11:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #12:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #13:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #14:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #15:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #16:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #17:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #18:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #19:   2%|████▋                                                                                                                                                                                                                      | 55/2554 [00:02<01:05, 38.06ex/s]
+preprocess dataset #20:   2%|████▎                                                                                                                                                                                                                      | 51/2554 [00:02<01:12, 34.68ex/s]
+preprocess dataset #21:   2%|████▎                                                                                                                                                                                                                      | 50/2554 [00:01<01:11, 34.91ex/s]
+preprocess dataset #22:   2%|████▎                                                                                                                                                                                                                      | 51/2554 [00:01<01:08, 36.29ex/s]
+preprocess dataset #23:   2%|████▎                                                                                                                                                                                                                      | 51/2554 [00:01<01:10, 35.29ex/s]
+preprocess dataset #24:   2%|████▍                                                                                                                                                                                                                      | 52/2554 [00:01<01:06, 37.45ex/s]
+preprocess dataset #25:   2%|████                                                                                                                                                                                                                       | 47/2554 [00:01<01:13, 34.22ex/s]
+preprocess dataset #26:   2%|████▎                                                                                                                                                                                                                      | 50/2554 [00:01<01:10, 35.50ex/s]
+preprocess dataset #27:   2%|████                                                                                                                                                                                                                       | 48/2554 [00:01<01:18, 31.80ex/s]
+preprocess dataset #28:   2%|███▊                                                                                                                                                                                                                       | 45/2554 [00:01<01:13, 34.13ex/s]
+preprocess dataset #29:   2%|███▉                                                                                                                                                                                                                       | 46/2554 [00:01<01:13, 33.98ex/s]
+preprocess dataset #30:   2%|████▏                                                                                                                                                                                                                      | 49/2554 [00:01<01:09, 35.99ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #28:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 2514/2554 [01:15<00:01, 34.94ex/s]
+preprocess dataset #29:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 2539/2554 [01:16<00:00, 34.97ex/s]
+preprocess dataset #30:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎   | 2510/2554 [01:15<00:01, 35.36ex/s]
+preprocess dataset #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 2549/2554 [01:17<00:00, 42.51ex/s]
+preprocess dataset #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 2549/2554 [01:17<00:00, 33.80ex/s]
+preprocess dataset #22:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 2540/2554 [01:17<00:00, 38.37ex/s]
+preprocess dataset #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2548/2554 [01:18<00:00, 30.58ex/s]
+preprocess dataset #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 2552/2554 [01:18<00:00, 28.64ex/s]
+preprocess dataset #25: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 2546/2554 [01:17<00:00, 31.20ex/s]
+preprocess dataset #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 2545/2554 [01:17<00:00, 39.12ex/s]
+preprocess dataset #30:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 2537/2554 [01:16<00:00, 33.48ex/s]
+preprocess dataset #26: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 2543/2554 [01:17<00:00, 31.78ex/s]
+preprocess dataset #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 2549/2554 [01:17<00:00, 38.35ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2550/2554 [01:17<00:00, 32.95ex/s]
+preprocess dataset #25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2554/2554 [01:17<00:00, 31.93ex/s]
+preprocess dataset #27: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2550/2554 [01:17<00:00, 30.86ex/s]
+preprocess dataset #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 2553/2554 [01:17<00:00, 36.68ex/s]
+#4: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 508.31ba/s]
+#5: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 444.60ba/s]
+#7: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 488.72ba/s]
+#8: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 493.02ba/s]
+#6: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 370.18ba/s]
+#9: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 482.55ba/s]
+#10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 484.63ba/s]
+#11: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 498.87ba/s]
+#12: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 495.99ba/s]
+#13: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 500.41ba/s]
+#15: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 504.52ba/s]
+#16: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 495.38ba/s]
+#14: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 331.51ba/s]
+#17: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 496.26ba/s]
+#19: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 495.70ba/s]
+#20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 473.88ba/s]
+#18: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 379.72ba/s]
+#21: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 486.11ba/s]
+#22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 494.63ba/s]
+#23: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 500.22ba/s]
+#24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 484.17ba/s]
+#25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 497.14ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 492.43ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 487.74ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 493.66ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 500.44ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 501.05ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 520.87ba/s]
+#19:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 156.47ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 179.37ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 168.50ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 176.11ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 214.55ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 220.18ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 221.48ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 221.45ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 195.13ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 279.27ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 284.60ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 303.99ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 105.47ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 114.25ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 111.49ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 120.21ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 119.92ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 119.85ba/s]
+https://symbolize.stripped_domain/r/?trace=7f7a002173f4,7f7a0026b0bf,7f,3c85ee413c3fb674&map=https://symbolize.stripped_domain/r/?trace=7f7a002173f4,7f7a0026b0bf,7f,
+3c85ee413c3fb674&map=*** SIGTERM received by PID 296120 (TID 296120) on cpu 12 from PID 1290249; stack trace: ***                                                                                                                                                  | 0/10 [00:00<?, ?ba/s]
+*** SIGTERM received by PID 296122 (TID 296122) on cpu 4 from PID 1290249; stack trace: ***
+PC: @     0x7f7a002173f4  (unknown)  do_futex_wait.constprop.0                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+PC: @     0x7f7a002173f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7f77b0a52294        976  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @     0x7f77b0a52294        976  (unknown)
+    @     0x7f7a0026b0c0  894774416  (unknown)
+    @     0x7f7a0026b0c0  894774416  (unknown)
+    @               0x80  (unknown)  (unknown)
+    @               0x80  (unknown)  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @ 0x3c85ee413c3fb675  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7f7a002173f4,7f77b0a52293,    @ 0x3c85ee413c3fb675  (unknown)  (unknown)
+7f7a0026b0bf,7f,https://symbolize.stripped_domain/r/?trace=3c85ee413c3fb6747f7a002173f4,&map=7f77b0a52293,7f7a0026b0bf,7f,3c85ee413c3fb674fbcd4e3f2be272741f2aecd9d840a066:7f779b4b5000-7f77b0de4c60&map=
+fbcd4e3f2be272741f2aecd9d840a066:7f779b4b5000-7f77b0de4c60
+E0729 21:48:31.155991  296122 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0729 21:48:31.156014  296120 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                              | 0/10 [00:00<?, ?ba/s]
+E0729 21:48:31.204794  296122 process_state.cc:774] RAW: Raising signal 15 with default behavior                                                                                                                                                                   | 0/10 [00:00<?, ?ba/s]
+E0729 21:48:31.204815  296120 process_state.cc:774] RAW: Raising signal 15 with default behavior                                                                                                                                                                   | 0/10 [00:00<?, ?ba/s]
+#0: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 79.62ba/s]
+#2: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 84.91ba/s]
+#3: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 104.46ba/s]
+#1: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 68.56ba/s]
+#5: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 101.60ba/s]
+#6: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 105.21ba/s]
+#4: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 97.29ba/s]
+#7: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 106.05ba/s]
+#9: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 106.27ba/s]
+#10: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 106.29ba/s]
+#11: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.46ba/s]
+#8: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 60.75ba/s]
+#13: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.76ba/s]
+#12: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 88.69ba/s]
+#16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 187.68ba/s]
+#14: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 110.60ba/s]
+#15: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 79.94ba/s]
+#17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 100.16ba/s]
+#18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 104.68ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 106.13ba/s]
+#20: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 98.49ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 102.00ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 100.38ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 101.16ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 102.35ba/s]
+#25: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 93.59ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 105.12ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 106.75ba/s]
+#27: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 71.70ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.06ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 104.30ba/s]
+#31: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 94.40ba/s]
+https://symbolize.stripped_domain/r/?trace=7f7a002173f4,7f7a0026b0bf,7f,3d1613d13b8d3040&map=
+*** SIGTERM received by PID 296275 (TID 296275) on cpu 8 from PID 1290249; stack trace: ***                                                                                                                                                                         | 0/2 [00:00<?, ?ba/s]
+PC: @     0x7f7a002173f4  (unknown)  do_futex_wait.constprop.0                                                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+    @     0x7f77b0a52294        976  (unknown)
+    @     0x7f7a0026b0c0  894774416  (unknown)
+    @               0x80  (unknown)  (unknown)                                                                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+    @ 0x3d1613d13b8d3041  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7f7a002173f4,7f77b0a52293,7f7a0026b0bf,7f,3d1613d13b8d3040&map=fbcd4e3f2be272741f2aecd9d840a066:7f779b4b5000-7f77b0de4c60                                                                                                | 0/2 [00:00<?, ?ba/s]
+E0729 21:48:35.446978  296275 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                               | 0/2 [00:00<?, ?ba/s]
+E0729 21:48:35.495447  296275 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 96.65ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 110.18ba/s]
+#20: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 68.14ba/s]
+#21: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 68.95ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 105.41ba/s]
+#23: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 83.37ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 107.38ba/s]
+#24: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 88.15ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 106.39ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 109.26ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 111.91ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 114.34ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 120.25ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#31:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
\ No newline at end of file
diff --git a/wandb/run-20220729_213705-23c375az/files/requirements.txt b/wandb/run-20220729_213705-23c375az/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20cb6abc20ac8cace779da721986ddb6a238845d
--- /dev/null
+++ b/wandb/run-20220729_213705-23c375az/files/requirements.txt
@@ -0,0 +1,151 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pygments==2.11.1
+pyparsing==3.0.6
+python-dateutil==2.8.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220729_213705-23c375az/files/wandb-metadata.json b/wandb/run-20220729_213705-23c375az/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..40e8776022db4195f6b4f74d58d38e1e08657d67
--- /dev/null
+++ b/wandb/run-20220729_213705-23c375az/files/wandb-metadata.json
@@ -0,0 +1,66 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-29T21:37:08.512911",
+    "startedAt": "2022-07-29T21:37:05.078287",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=128",
+        "--per_device_eval_batch_size=128",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220729_213705-23c375az/files/wandb-summary.json b/wandb/run-20220729_213705-23c375az/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..2c762592d8750c5c42fc475d342be2209b5bc465
--- /dev/null
+++ b/wandb/run-20220729_213705-23c375az/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 696}}
\ No newline at end of file
diff --git a/wandb/run-20220729_213705-23c375az/logs/debug-internal.log b/wandb/run-20220729_213705-23c375az/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..226882545a97cc2122c9266ece8a1c0cd445d2f6
--- /dev/null
+++ b/wandb/run-20220729_213705-23c375az/logs/debug-internal.log
@@ -0,0 +1,499 @@
+2022-07-29 21:37:05,909 INFO    MainThread:1291487 [internal.py:wandb_internal():87] W&B internal server running at pid: 1291487, started at: 2022-07-29 21:37:05.909678
+2022-07-29 21:37:05,911 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: check_version
+2022-07-29 21:37:05,912 INFO    WriterThread:1291487 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/run-23c375az.wandb
+2022-07-29 21:37:05,912 DEBUG   SenderThread:1291487 [sender.py:send():234] send: header
+2022-07-29 21:37:05,913 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: check_version
+2022-07-29 21:37:05,952 DEBUG   SenderThread:1291487 [sender.py:send():234] send: run
+2022-07-29 21:37:06,139 INFO    SenderThread:1291487 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files
+2022-07-29 21:37:06,140 INFO    SenderThread:1291487 [sender.py:_start_run_threads():804] run started: 23c375az with start time 1659130625
+2022-07-29 21:37:06,140 DEBUG   SenderThread:1291487 [sender.py:send():234] send: summary
+2022-07-29 21:37:06,141 INFO    SenderThread:1291487 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 21:37:06,141 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: run_start
+2022-07-29 21:37:07,147 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/wandb-summary.json
+2022-07-29 21:37:08,512 DEBUG   HandlerThread:1291487 [meta.py:__init__():40] meta init
+2022-07-29 21:37:08,512 DEBUG   HandlerThread:1291487 [meta.py:__init__():54] meta init done
+2022-07-29 21:37:08,512 DEBUG   HandlerThread:1291487 [meta.py:probe():214] probe
+2022-07-29 21:37:08,514 DEBUG   HandlerThread:1291487 [meta.py:_setup_git():204] setup git
+2022-07-29 21:37:08,548 DEBUG   HandlerThread:1291487 [meta.py:_setup_git():211] setup git done
+2022-07-29 21:37:08,548 DEBUG   HandlerThread:1291487 [meta.py:_save_code():92] save code
+2022-07-29 21:37:08,560 DEBUG   HandlerThread:1291487 [meta.py:_save_code():113] save code done
+2022-07-29 21:37:08,561 DEBUG   HandlerThread:1291487 [meta.py:_save_patches():130] save patches
+2022-07-29 21:37:08,618 DEBUG   HandlerThread:1291487 [meta.py:_save_patches():172] save patches done
+2022-07-29 21:37:08,618 DEBUG   HandlerThread:1291487 [meta.py:_save_pip():58] save pip
+2022-07-29 21:37:08,619 DEBUG   HandlerThread:1291487 [meta.py:_save_pip():72] save pip done
+2022-07-29 21:37:08,619 DEBUG   HandlerThread:1291487 [meta.py:probe():252] probe done
+2022-07-29 21:37:08,622 DEBUG   SenderThread:1291487 [sender.py:send():234] send: files
+2022-07-29 21:37:08,623 INFO    SenderThread:1291487 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-29 21:37:08,623 INFO    SenderThread:1291487 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-29 21:37:08,630 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:37:08,633 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:37:09,142 INFO    Thread-11 :1291487 [upload_job.py:push():137] Uploaded file /tmp/tmpyiz0dvxkwandb/1t4oizg3-wandb-metadata.json
+2022-07-29 21:37:09,155 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/code/run_flax_speech_recognition_ctc.py
+2022-07-29 21:37:09,156 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:09,156 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/requirements.txt
+2022-07-29 21:37:09,156 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/wandb-metadata.json
+2022-07-29 21:37:09,156 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/code
+2022-07-29 21:37:09,327 INFO    Thread-12 :1291487 [upload_job.py:push():137] Uploaded file /tmp/tmpyiz0dvxkwandb/1swllrwz-code/run_flax_speech_recognition_ctc.py
+2022-07-29 21:37:11,156 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:13,157 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:15,158 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:21,161 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:23,161 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:23,778 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:37:23,778 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:37:25,162 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:27,163 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:29,164 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:31,165 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:33,166 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:35,167 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:36,591 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:37:37,168 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:38,912 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:37:38,913 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:37:39,169 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:41,170 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:43,171 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:45,171 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:47,172 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:49,173 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:51,174 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:53,175 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:54,049 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:37:54,049 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:37:55,176 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:57,177 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:37:59,178 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:01,178 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:03,179 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:05,180 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:06,656 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:38:07,181 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:09,182 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:09,186 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:38:09,186 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:38:11,183 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:23,189 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:24,328 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:38:24,328 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:38:25,190 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:27,191 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:31,193 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:34,194 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:36,723 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:38:39,484 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:38:39,484 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:38:40,197 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:44,199 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:48,201 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:54,750 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:38:54,750 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:38:56,204 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:38:58,205 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:00,206 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:02,207 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:04,208 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:06,209 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:06,789 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:39:08,210 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:09,926 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:39:09,926 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:39:10,211 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:12,212 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:14,213 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:16,214 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:18,215 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:20,216 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:22,217 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:24,218 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:25,072 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:39:25,073 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:39:26,219 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:28,220 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:30,221 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:32,222 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:34,223 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:36,224 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:36,873 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:39:38,225 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:40,210 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:39:40,211 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:39:40,226 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:42,227 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:44,228 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:46,229 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:48,230 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:50,231 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:52,233 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:54,234 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:55,358 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:39:55,358 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:39:56,235 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:39:58,235 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:00,236 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:02,237 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:04,238 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:06,239 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:06,953 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:40:08,241 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:10,242 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:10,499 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:40:10,499 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:40:12,242 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:14,244 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:16,245 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:18,246 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:20,247 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:22,248 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:24,249 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:26,208 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:40:26,208 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:40:26,249 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:28,250 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:30,252 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:32,253 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:34,254 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:36,255 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:37,023 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:40:38,256 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:40,257 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:41,379 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:40:41,379 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:40:42,258 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:44,259 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:46,260 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:48,261 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:50,262 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:52,263 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:54,264 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:56,265 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:40:56,515 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:40:56,515 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:40:58,266 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:00,267 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:02,268 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:04,269 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:06,269 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:07,102 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:41:08,270 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:10,271 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:11,656 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:41:11,657 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:41:12,273 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:14,274 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:16,274 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:18,275 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:20,276 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:22,277 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:24,278 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:26,795 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:41:26,795 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:41:27,280 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:29,281 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:31,282 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:33,283 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:35,284 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:37,184 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:41:37,285 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:39,286 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:41,287 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:41,947 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:41:41,948 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:41:43,288 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:45,293 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:47,294 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:49,295 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:51,296 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:53,297 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:55,298 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:57,094 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:41:57,095 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:41:57,299 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:41:59,305 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:01,306 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:03,307 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:05,308 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:07,253 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:42:07,309 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:09,310 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:11,311 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:12,257 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:42:12,257 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:42:13,313 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:15,314 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:17,314 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:19,315 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:21,316 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:23,317 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:25,318 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:27,319 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:27,413 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:42:27,413 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:42:29,320 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:31,321 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:33,322 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:35,324 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:37,325 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:37,333 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:42:39,326 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:41,327 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:42,658 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:42:42,658 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:42:43,328 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:45,329 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:47,330 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:49,331 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:51,332 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:53,333 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:55,334 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:57,335 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:42:57,804 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:42:57,805 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:42:59,336 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:43:01,337 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:43:03,338 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:43:05,339 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:43:07,340 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:43:07,407 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:43:09,341 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:43:12,942 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:43:12,942 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:43:28,089 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:43:28,089 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:43:37,482 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:43:43,251 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:43:43,252 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:43:57,363 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:43:58,409 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:43:58,409 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:43:59,364 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:44:01,365 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:44:03,366 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:44:05,367 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:44:07,557 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:44:13,568 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:44:13,568 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:44:28,709 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:44:28,710 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:44:35,379 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:44:37,628 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:44:43,843 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:44:43,844 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:44:49,386 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:44:51,387 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:44:53,387 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:44:55,388 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:44:57,389 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:44:58,995 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:44:58,996 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:44:59,390 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:01,391 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:03,392 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:05,393 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:07,394 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:07,705 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:45:09,395 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:11,396 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:13,397 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:14,157 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:45:14,158 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:45:15,398 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:17,399 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:19,400 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:21,401 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:23,402 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:25,403 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:27,404 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:29,316 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:45:29,317 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:45:29,405 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:31,406 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:45:37,775 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:45:44,469 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:45:44,469 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:45:59,774 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:45:59,775 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:46:03,419 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:05,420 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:07,421 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:07,853 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:46:09,421 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:11,422 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:13,423 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:14,979 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:46:14,980 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:46:15,424 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:17,425 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:19,426 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:21,427 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:23,428 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:25,429 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:27,430 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:29,431 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:30,120 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:46:30,121 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:46:31,432 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:37,435 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:37,936 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:46:39,436 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:41,437 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:43,438 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:45,279 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:46:45,279 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:46:45,442 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:47,442 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:49,443 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:51,444 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:53,445 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:55,446 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:57,446 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:46:59,447 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:47:00,440 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:47:00,440 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:47:01,449 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:47:04,451 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:47:06,452 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:47:08,007 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:47:08,453 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:47:10,454 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:47:12,454 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:47:14,456 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:47:15,599 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:47:15,599 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:47:16,457 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:47:18,458 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:47:20,459 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:47:22,460 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:47:30,763 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:47:30,763 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:47:38,082 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:47:45,898 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:47:45,899 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:48:01,034 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:48:01,035 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:48:08,161 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:48:16,175 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:48:16,175 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:48:22,484 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:48:24,485 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:48:28,486 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:48:31,430 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:48:31,430 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:48:32,488 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:48:36,490 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:48:38,236 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:48:40,491 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:48:43,046 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 21:48:43,046 DEBUG   SenderThread:1291487 [sender.py:send():234] send: telemetry
+2022-07-29 21:48:43,047 DEBUG   SenderThread:1291487 [sender.py:send():234] send: exit
+2022-07-29 21:48:43,047 INFO    SenderThread:1291487 [sender.py:send_exit():366] handling exit code: 1
+2022-07-29 21:48:43,048 INFO    SenderThread:1291487 [sender.py:send_exit():368] handling runtime: 696
+2022-07-29 21:48:43,049 INFO    SenderThread:1291487 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 21:48:43,049 INFO    SenderThread:1291487 [sender.py:send_exit():374] send defer
+2022-07-29 21:48:43,049 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 21:48:43,050 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 21:48:43,050 INFO    HandlerThread:1291487 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-29 21:48:43,050 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: defer
+2022-07-29 21:48:43,050 INFO    SenderThread:1291487 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-29 21:48:43,050 INFO    SenderThread:1291487 [sender.py:transition_state():387] send defer: 1
+2022-07-29 21:48:43,050 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 21:48:43,050 INFO    HandlerThread:1291487 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-29 21:48:43,150 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: defer
+2022-07-29 21:48:43,150 INFO    SenderThread:1291487 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-29 21:48:43,150 INFO    SenderThread:1291487 [sender.py:transition_state():387] send defer: 2
+2022-07-29 21:48:43,150 DEBUG   SenderThread:1291487 [sender.py:send():234] send: stats
+2022-07-29 21:48:43,151 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 21:48:43,151 INFO    HandlerThread:1291487 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-29 21:48:43,151 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: defer
+2022-07-29 21:48:43,151 INFO    SenderThread:1291487 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-29 21:48:43,151 INFO    SenderThread:1291487 [sender.py:transition_state():387] send defer: 3
+2022-07-29 21:48:43,152 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 21:48:43,152 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 21:48:43,152 INFO    HandlerThread:1291487 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-29 21:48:43,152 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 21:48:43,152 DEBUG   SenderThread:1291487 [sender.py:send():234] send: summary
+2022-07-29 21:48:43,153 INFO    SenderThread:1291487 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 21:48:43,153 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: defer
+2022-07-29 21:48:43,153 INFO    SenderThread:1291487 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-29 21:48:43,153 INFO    SenderThread:1291487 [sender.py:transition_state():387] send defer: 4
+2022-07-29 21:48:43,154 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 21:48:43,154 INFO    HandlerThread:1291487 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-29 21:48:43,154 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: defer
+2022-07-29 21:48:43,154 INFO    SenderThread:1291487 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-29 21:48:43,254 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 21:48:43,324 INFO    SenderThread:1291487 [sender.py:transition_state():387] send defer: 5
+2022-07-29 21:48:43,324 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 21:48:43,324 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 21:48:43,324 INFO    HandlerThread:1291487 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-29 21:48:43,325 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: defer
+2022-07-29 21:48:43,325 INFO    SenderThread:1291487 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-29 21:48:43,325 INFO    SenderThread:1291487 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-29 21:48:43,425 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 21:48:43,492 INFO    Thread-8  :1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/config.yaml
+2022-07-29 21:48:43,493 INFO    SenderThread:1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:48:43,493 INFO    SenderThread:1291487 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/wandb-summary.json
+2022-07-29 21:48:43,493 INFO    SenderThread:1291487 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files
+2022-07-29 21:48:43,493 INFO    SenderThread:1291487 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/config.yaml config.yaml
+2022-07-29 21:48:43,493 INFO    SenderThread:1291487 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/requirements.txt requirements.txt
+2022-07-29 21:48:43,494 INFO    SenderThread:1291487 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log output.log
+2022-07-29 21:48:43,494 INFO    SenderThread:1291487 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/wandb-summary.json wandb-summary.json
+2022-07-29 21:48:43,494 INFO    SenderThread:1291487 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/wandb-metadata.json wandb-metadata.json
+2022-07-29 21:48:43,495 INFO    SenderThread:1291487 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-29 21:48:43,495 INFO    SenderThread:1291487 [sender.py:transition_state():387] send defer: 6
+2022-07-29 21:48:43,495 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 21:48:43,495 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 21:48:43,496 INFO    HandlerThread:1291487 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-29 21:48:43,498 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: defer
+2022-07-29 21:48:43,501 INFO    SenderThread:1291487 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-29 21:48:43,501 INFO    SenderThread:1291487 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 21:48:43,600 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 21:48:43,600 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 21:48:43,702 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 21:48:43,702 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 21:48:43,804 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 21:48:43,804 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 21:48:43,906 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 21:48:43,906 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 21:48:43,957 INFO    Thread-14 :1291487 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/requirements.txt
+2022-07-29 21:48:43,963 INFO    Thread-16 :1291487 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/wandb-summary.json
+2022-07-29 21:48:44,004 INFO    Thread-13 :1291487 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/config.yaml
+2022-07-29 21:48:44,007 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 21:48:44,008 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 21:48:44,109 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 21:48:44,110 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 21:48:44,176 INFO    Thread-15 :1291487 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/files/output.log
+2022-07-29 21:48:44,211 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 21:48:44,212 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 21:48:44,313 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 21:48:44,313 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 21:48:44,376 INFO    Thread-7  :1291487 [sender.py:transition_state():387] send defer: 7
+2022-07-29 21:48:44,377 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 21:48:44,377 INFO    HandlerThread:1291487 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-29 21:48:44,377 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: defer
+2022-07-29 21:48:44,377 INFO    SenderThread:1291487 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-29 21:48:44,415 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 21:48:45,012 INFO    SenderThread:1291487 [sender.py:transition_state():387] send defer: 8
+2022-07-29 21:48:45,012 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 21:48:45,013 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 21:48:45,013 INFO    HandlerThread:1291487 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-29 21:48:45,013 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: defer
+2022-07-29 21:48:45,013 INFO    SenderThread:1291487 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-29 21:48:45,013 INFO    SenderThread:1291487 [sender.py:transition_state():387] send defer: 9
+2022-07-29 21:48:45,014 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 21:48:45,014 INFO    HandlerThread:1291487 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-29 21:48:45,014 DEBUG   SenderThread:1291487 [sender.py:send():234] send: final
+2022-07-29 21:48:45,014 DEBUG   SenderThread:1291487 [sender.py:send():234] send: footer
+2022-07-29 21:48:45,014 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: defer
+2022-07-29 21:48:45,014 INFO    SenderThread:1291487 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-29 21:48:45,114 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 21:48:45,115 DEBUG   SenderThread:1291487 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 21:48:45,115 INFO    SenderThread:1291487 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 21:48:45,380 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-29 21:48:45,381 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-29 21:48:45,382 DEBUG   HandlerThread:1291487 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-29 21:48:45,382 INFO    HandlerThread:1291487 [handler.py:finish():731] shutting down handler
+2022-07-29 21:48:46,014 INFO    WriterThread:1291487 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/run-23c375az.wandb
+2022-07-29 21:48:46,379 INFO    SenderThread:1291487 [sender.py:finish():1070] shutting down sender
+2022-07-29 21:48:46,379 INFO    SenderThread:1291487 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 21:48:46,379 INFO    SenderThread:1291487 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 21:48:46,383 INFO    MainThread:1291487 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220729_213705-23c375az/logs/debug.log b/wandb/run-20220729_213705-23c375az/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..426f77b792b3cc3ba9e547bdb4ab9af08ae521a2
--- /dev/null
+++ b/wandb/run-20220729_213705-23c375az/logs/debug.log
@@ -0,0 +1,157 @@
+2022-07-29 21:37:05,079 INFO    MainThread:1290249 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-29 21:37:05,079 INFO    MainThread:1290249 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-29 21:37:05,079 INFO    MainThread:1290249 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/logs/debug.log
+2022-07-29 21:37:05,079 INFO    MainThread:1290249 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_213705-23c375az/logs/debug-internal.log
+2022-07-29 21:37:05,080 INFO    MainThread:1290249 [wandb_init.py:init():404] calling init triggers
+2022-07-29 21:37:05,080 INFO    MainThread:1290249 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-29 21:37:05,080 INFO    MainThread:1290249 [wandb_init.py:init():460] starting backend
+2022-07-29 21:37:05,080 INFO    MainThread:1290249 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-29 21:37:05,107 INFO    MainThread:1290249 [backend.py:ensure_launched():216] starting backend process...
+2022-07-29 21:37:05,132 INFO    MainThread:1290249 [backend.py:ensure_launched():221] started backend process with pid: 1291487
+2022-07-29 21:37:05,134 INFO    MainThread:1290249 [wandb_init.py:init():469] backend started and connected
+2022-07-29 21:37:05,147 INFO    MainThread:1290249 [wandb_init.py:init():533] updated telemetry
+2022-07-29 21:37:05,209 INFO    MainThread:1290249 [wandb_init.py:init():563] communicating current version
+2022-07-29 21:37:05,950 INFO    MainThread:1290249 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-29 21:37:05,950 INFO    MainThread:1290249 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-29 21:37:06,141 INFO    MainThread:1290249 [wandb_init.py:init():606] starting run threads in backend
+2022-07-29 21:37:08,626 INFO    MainThread:1290249 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-29 21:37:08,626 INFO    MainThread:1290249 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-29 21:37:08,627 INFO    MainThread:1290249 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-29 21:37:08,629 INFO    MainThread:1290249 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-29 21:37:08,629 INFO    MainThread:1290249 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-29 21:48:40,391 INFO    MainThread:1290249 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-29 21:48:40,398 INFO    MainThread:1290249 [wandb_run.py:_restore():1752] restore
+2022-07-29 21:48:43,050 INFO    MainThread:1290249 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73725
+  total_bytes: 73725
+}
+
+2022-07-29 21:48:43,153 INFO    MainThread:1290249 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73725
+  total_bytes: 73725
+}
+
+2022-07-29 21:48:43,324 INFO    MainThread:1290249 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73725
+  total_bytes: 73725
+}
+
+2022-07-29 21:48:43,499 INFO    MainThread:1290249 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73725
+  total_bytes: 298265
+}
+
+2022-07-29 21:48:43,601 INFO    MainThread:1290249 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73725
+  total_bytes: 298265
+}
+
+2022-07-29 21:48:43,703 INFO    MainThread:1290249 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 298265
+  total_bytes: 298265
+}
+
+2022-07-29 21:48:43,805 INFO    MainThread:1290249 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 298265
+  total_bytes: 298265
+}
+
+2022-07-29 21:48:43,906 INFO    MainThread:1290249 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 298265
+  total_bytes: 298265
+}
+
+2022-07-29 21:48:44,008 INFO    MainThread:1290249 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 298265
+  total_bytes: 298265
+}
+
+2022-07-29 21:48:44,110 INFO    MainThread:1290249 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 298265
+  total_bytes: 298265
+}
+
+2022-07-29 21:48:44,212 INFO    MainThread:1290249 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 298265
+  total_bytes: 298265
+}
+
+2022-07-29 21:48:44,314 INFO    MainThread:1290249 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 298265
+  total_bytes: 298265
+}
+
+2022-07-29 21:48:45,013 INFO    MainThread:1290249 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 298265
+  total_bytes: 298265
+}
+
+2022-07-29 21:48:45,379 INFO    MainThread:1290249 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 298265
+  total_bytes: 298265
+}
+local_info {
+}
+
+2022-07-29 21:48:47,066 INFO    MainThread:1290249 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220729_213705-23c375az/run-23c375az.wandb b/wandb/run-20220729_213705-23c375az/run-23c375az.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..3c52e29be2bbb544e6e30b6195d6bc336628ec32
--- /dev/null
+++ b/wandb/run-20220729_213705-23c375az/run-23c375az.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d7a88169be083fc2d3fe98dd73052d077472cb45f68173ccacdad07725b7cc3
+size 341249
diff --git a/wandb/run-20220729_215007-14bu0ptz/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_215007-14bu0ptz/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac
--- /dev/null
+++ b/wandb/run-20220729_215007-14bu0ptz/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1605 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220729_215007-14bu0ptz/files/config.yaml b/wandb/run-20220729_215007-14bu0ptz/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..15f01565667c96392530efd33dfc36216286b866
--- /dev/null
+++ b/wandb/run-20220729_215007-14bu0ptz/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659131407
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220729_215007-14bu0ptz/files/output.log b/wandb/run-20220729_215007-14bu0ptz/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..e7038d36ff2897c0eec18bc8aa2a6161bd6a87f0
--- /dev/null
+++ b/wandb/run-20220729_215007-14bu0ptz/files/output.log
@@ -0,0 +1,1045 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul29_21-50-02_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=128,
+per_device_train_batch_size=128,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 80.41it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 464.19it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+filtering NST #0:  46%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                                      | 3686/7930 [00:01<00:01, 2403.10ex/s]
+filtering NST #1:  39%|██████████████████████████████████████████████████████████████████████████████████████                                                                                                                                       | 3086/7930 [00:01<00:02, 2093.09ex/s]
+filtering NST #2:  36%|███████████████████████████████████████████████████████████████████████████████                                                                                                                                              | 2836/7930 [00:01<00:02, 2291.19ex/s]
+filtering NST #3:  28%|█████████████████████████████████████████████████████████████▋                                                                                                                                                               | 2214/7930 [00:01<00:04, 1285.27ex/s]
+filtering NST #4:  27%|████████████████████████████████████████████████████████████▏                                                                                                                                                                | 2161/7930 [00:01<00:05, 1145.05ex/s]
+filtering NST #5:  28%|█████████████████████████████████████████████████████████████▏                                                                                                                                                               | 2194/7930 [00:01<00:04, 1166.55ex/s]
+filtering NST #6:  28%|█████████████████████████████████████████████████████████████                                                                                                                                                                | 2190/7930 [00:01<00:04, 1205.53ex/s]
+filtering NST #7:  41%|███████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                                                 | 3282/7930 [00:01<00:02, 1928.98ex/s]
+filtering NST #8:  50%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                             | 3997/7930 [00:01<00:01, 2536.44ex/s]
+filtering NST #9:  48%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                                  | 3811/7930 [00:01<00:01, 2422.65ex/s]
+filtering NST #10:  33%|████████████████████████████████████████████████████████████████████████▉                                                                                                                                                   | 2628/7930 [00:01<00:02, 2183.23ex/s]
+filtering NST #11:  27%|██████████████████████████████████████████████████████████▉                                                                                                                                                                 | 2124/7930 [00:01<00:05, 1116.97ex/s]
+filtering NST #12:  25%|██████████████████████████████████████████████████████▊                                                                                                                                                                     | 1974/7930 [00:01<00:04, 1323.00ex/s]
+filtering NST #13:  25%|██████████████████████████████████████████████████████▉                                                                                                                                                                     | 1979/7929 [00:01<00:04, 1318.92ex/s]
+filtering NST #14:  41%|█████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                                  | 3240/7929 [00:01<00:02, 2041.53ex/s]
+filtering NST #15:  25%|███████████████████████████████████████████████████████▏                                                                                                                                                                    | 1989/7929 [00:01<00:04, 1328.70ex/s]
+filtering NST #16:  46%|█████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                                      | 3656/7929 [00:01<00:01, 2319.68ex/s]
+filtering NST #17:  45%|███████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                                        | 3573/7929 [00:01<00:01, 2290.56ex/s]
+filtering NST #18:  26%|█████████████████████████████████████████████████████████▍                                                                                                                                                                  | 2072/7929 [00:01<00:05, 1108.22ex/s]
+filtering NST #19:  46%|█████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                      | 3674/7929 [00:01<00:01, 2345.95ex/s]
+filtering NST #20:  47%|██████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                                                     | 3707/7929 [00:01<00:01, 2384.67ex/s]
+filtering NST #21:  46%|████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                       | 3639/7929 [00:01<00:01, 2341.27ex/s]
+filtering NST #22:  47%|███████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                                    | 3726/7929 [00:01<00:01, 2398.60ex/s]
+filtering NST #23:  47%|███████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                                    | 3729/7929 [00:01<00:01, 2405.17ex/s]
+filtering NST #24:  46%|█████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                      | 3675/7929 [00:01<00:01, 2397.96ex/s]
+filtering NST #25:  47%|███████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                                    | 3718/7929 [00:01<00:01, 2352.52ex/s]
+filtering NST #26:  47%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                                     | 3703/7929 [00:01<00:01, 2381.59ex/s]
+filtering NST #27:  47%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                                     | 3703/7929 [00:01<00:01, 2389.93ex/s]
+filtering NST #28:  45%|██████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                                         | 3558/7929 [00:01<00:01, 2295.16ex/s]
+filtering NST #29:  25%|███████████████████████████████████████████████████████▏                                                                                                                                                                    | 1988/7929 [00:01<00:04, 1344.06ex/s]
+filtering NST #30:  36%|████████████████████████████████████████████████████████████████████████████████▏                                                                                                                                           | 2890/7929 [00:01<00:01, 2543.20ex/s]
+filtering NST #15:  62%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                    | 4885/7929 [00:03<00:01, 1922.01ex/s]
+filtering NST #21:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 7862/7929 [00:03<00:00, 2378.04ex/s]
+filtering NST #18:  77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                   | 6078/7929 [00:03<00:00, 2157.66ex/s]
+filtering NST #31:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████         | 7608/7929 [00:03<00:00, 2428.73ex/s]
+filtering NST #18:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                           | 6362/7929 [00:03<00:00, 2334.01ex/s]
+filtering NST #29:  73%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                           | 5797/7929 [00:03<00:00, 2381.69ex/s]
+filtering NST #28:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏     | 7718/7929 [00:03<00:00, 2238.73ex/s]
+filtering NST #31:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 7881/7929 [00:03<00:00, 2510.70ex/s]
+filtering NST #29:  76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                    | 6045/7929 [00:03<00:00, 2019.87ex/s]
+filtering NST #30:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎            | 7473/7929 [00:03<00:00, 2209.27ex/s]
+filtering NST #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7929/7929 [00:04<00:00, 1597.96ex/s]
+filtering NST #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7929/7929 [00:04<00:00, 1597.96ex/s]
+filtering NST #1:  35%|████████████████████████████████████████████████████████████████████████████▊                                                                                                                                                 | 817/2363 [00:00<00:00, 1885.26ex/s]
+filtering NST #16:  73%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                           | 1725/2363 [00:01<00:00, 1527.92ex/s]
+    @     0x7f0d9fd50294        976  (unknown)fef75ef99,7f0fef56d0bf&map= ██████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                           | 1725/2363 [00:01<00:00, 1527.92ex/s]
+filtering NST #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1057/1057 [00:00<00:00, 2022.10ex/s]
+E0729 21:50:39.818813  299646 process_state.cc:774] RAW: Raising signal 15 with default behaviorTERM.███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1057/1057 [00:00<00:00, 2022.10ex/s]
+E0729 21:50:39.818920  299617 process_state.cc:774] RAW: Raising signal 15 with default behavior
+E0729 21:50:39.819015  299649 process_state.cc:774] RAW: Raising signal 15 with default behavior
+E0729 21:50:39.819175  299619 process_state.cc:774] RAW: Raising signal 15 with default behavior
+E0729 21:50:39.819907  299656 process_state.cc:774] RAW: Raising signal 15 with default behavior
+E0729 21:50:39.820575  299623 process_state.cc:774] RAW: Raising signal 15 with default behavior
+E0729 21:50:39.820645  299611 process_state.cc:774] RAW: Raising signal 15 with default behavior
+E0729 21:50:39.826479  299620 process_state.cc:774] RAW: Raising signal 15 with default behavior
+E0729 21:50:39.831846  299653 process_state.cc:774] RAW: Raising signal 15 with default behavior
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+filtering NPSC #17:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                        | 1302/1595 [00:00<00:00, 2518.19ex/s]
+filtering NPSC #20:  81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                          | 1287/1595 [00:00<00:00, 3113.00ex/s]
+filtering NPSC #21:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                        | 1299/1595 [00:00<00:00, 3124.59ex/s]
+filtering NPSC #23:  53%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                       | 842/1595 [00:00<00:00, 4236.94ex/s]
+filtering NPSC #22:  80%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                           | 1280/1595 [00:00<00:00, 3046.53ex/s]
+filtering NPSC #23:  79%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                             | 1266/1595 [00:00<00:00, 2867.88ex/s]
+filtering NPSC #24:  80%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                           | 1276/1595 [00:00<00:00, 2888.52ex/s]
+filtering NPSC #27:  44%|████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                                           | 697/1594 [00:00<00:00, 2239.06ex/s]
+filtering NPSC #28:  52%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                          | 821/1594 [00:00<00:00, 4126.90ex/s]
+filtering NPSC #29:  53%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                       | 848/1594 [00:00<00:00, 4262.97ex/s]
+filtering NPSC #30:  30%|██████████████████████████████████████████████████████████████████▌                                                                                                                                                         | 482/1594 [00:00<00:00, 2335.03ex/s]
+filtering NPSC #30:  46%|████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                                       | 727/1594 [00:00<00:00, 2384.53ex/s]
+filtering NPSC #31:  45%|██████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                                         | 713/1594 [00:00<00:00, 2377.36ex/s]
+filtering NPSC #9: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 2863.23ex/s]
+filtering NPSC #10: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 2942.58ex/s]
+filtering NPSC #11: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 2948.20ex/s]
+filtering NPSC #12: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 3140.75ex/s]
+filtering NPSC #13: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 3122.36ex/s]
+filtering NPSC #17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 3777.13ex/s]
+filtering NPSC #14: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 2914.54ex/s]
+filtering NPSC #18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 3956.16ex/s]
+filtering NPSC #16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 3201.51ex/s]
+filtering NPSC #19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 4077.04ex/s]
+filtering NPSC #20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 4035.87ex/s]
+filtering NPSC #15: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 2494.70ex/s]
+filtering NPSC #21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 4031.17ex/s]
+filtering NPSC #23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 3869.40ex/s]
+filtering NPSC #24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 3657.49ex/s]
+filtering NPSC #25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 3978.18ex/s]
+filtering NPSC #26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 4042.04ex/s]
+filtering NPSC #22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 2868.35ex/s]
+filtering NPSC #27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 3951.27ex/s]
+filtering NPSC #28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 4010.50ex/s]
+filtering NPSC #29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 4083.08ex/s]
+filtering NPSC #30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 4069.21ex/s]
+filtering NPSC #31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [00:00<00:00, 4154.30ex/s]
+filtering NPSC #21:   0%|                                                                                                                                                                                                                                         | 0/214 [00:00<?, ?ex/s]
+filtering NPSC #22:   0%|                                                                                                                                                                                                                                         | 0/214 [00:00<?, ?ex/s]
+filtering NPSC #23:   0%|                                                                                                                                                                                                                                         | 0/213 [00:00<?, ?ex/s]
+filtering NPSC #24:   0%|                                                                                                                                                                                                                                         | 0/213 [00:00<?, ?ex/s]
+filtering NPSC #9: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 214/214 [00:00<00:00, 2863.23ex/s]
+filtering NPSC #25:   0%|                                                                                                                                                                                                                                         | 0/213 [00:00<?, ?ex/s]
+filtering NPSC #26:   0%|                                                                                                                                                                                                                                         | 0/213 [00:00<?, ?ex/s]
+filtering NPSC #27:   0%|                                                                                                                                                                                                                                         | 0/213 [00:00<?, ?ex/s]
+filtering NPSC #28:   0%|                                                                                                                                                                                                                                         | 0/213 [00:00<?, ?ex/s]
+filtering NPSC #29:   0%|                                                                                                                                                                                                                                         | 0/213 [00:00<?, ?ex/s]
+filtering NPSC #30:   0%|                                                                                                                                                                                                                                         | 0/213 [00:00<?, ?ex/s]
+filtering NPSC #21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 197/197 [00:00<00:00, 3194.85ex/s]
+filtering NPSC #23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 197/197 [00:00<00:00, 3857.88ex/s]
+filtering NPSC #16:   0%|                                                                                                                                                                                                                                         | 0/198 [00:00<?, ?ex/s]
+filtering NPSC #18:   0%|                                                                                                                                                                                                                                         | 0/198 [00:00<?, ?ex/s]
+filtering NPSC #19:   0%|                                                                                                                                                                                                                                         | 0/197 [00:00<?, ?ex/s]
+filtering NPSC #20:   0%|                                                                                                                                                                                                                                         | 0/197 [00:00<?, ?ex/s]
+filtering NPSC #21:   0%|                                                                                                                                                                                                                                         | 0/197 [00:00<?, ?ex/s]
+filtering NPSC #22:   0%|                                                                                                                                                                                                                                         | 0/197 [00:00<?, ?ex/s]
+filtering NPSC #23:   0%|                                                                                                                                                                                                                                         | 0/197 [00:00<?, ?ex/s]
+filtering NPSC #24:   0%|                                                                                                                                                                                                                                         | 0/197 [00:00<?, ?ex/s]
+filtering NPSC #25:   0%|                                                                                                                                                                                                                                         | 0/197 [00:00<?, ?ex/s]
+filtering NPSC #26:   0%|                                                                                                                                                                                                                                         | 0/197 [00:00<?, ?ex/s]
+filtering NPSC #27:   0%|                                                                                                                                                                                                                                         | 0/197 [00:00<?, ?ex/s]
+filtering NPSC #28:   0%|                                                                                                                                                                                                                                         | 0/197 [00:00<?, ?ex/s]
+filtering NPSC #29:   0%|                                                                                                                                                                                                                                         | 0/197 [00:00<?, ?ex/s]
+  "pad_token_id": 0,100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 197/197 [00:00<00:00, 3194.85ex/s]
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 197/197 [00:00<00:00, 3194.85ex/s]
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/vocab.json from cache at /home/javierr/.cache/huggingface/transformers/dbb47708b30c45561aee53d4cc808c3ed6c4785df1e2b757c8852837f6f58537.cd4a163fb068cd596a09687d4b1d7e345279d1497fdf547b3c986f4d6ee855b8
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/tokenizer_config.json from cache at /home/javierr/.cache/huggingface/transformers/32ac0765a83a8c538f4d3d8f104f69eab9648cb4d3e3a6bb6463185512f204d4.ff272c14a3ca512c9d9f85a898e6a086e8f790b06ff540f484000dab53132349
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/added_tokens.json from cache at /home/javierr/.cache/huggingface/transformers/a86e893e88bb40472d24ffc4e2be032578bb14755e88bb1e961bad6276161a17.23a5f4e269f91a88f81d8cb3c6f881c8eb30ad98b9c9691569d4cfb87512d722
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/special_tokens_map.json from cache at /home/javierr/.cache/huggingface/transformers/c0891e9bf7d08f1e6d4c0d7da3104155480b7f65b84d2e1ef0506f017fd60443.2fce57d7d6ae62285f9966753bbdba194f7ff904d5f380e3bb6a6242819f9a93
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_q', 'bias'), ('project_hid', 'kernel'), ('project_q', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('quantizer', 'codevectors'), ('project_hid', 'bias')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #0:  38%|██████████████████████████████████████████████████████████████████████████▋                                                                                                                          | 3612/9523 [00:00<00:00, 9005.42ex/s]
+removing punctuation from train split #1:  38%|█████████████████████████████████████████████████████████████████████████▉                                                                                                                           | 3576/9523 [00:00<00:00, 8729.88ex/s]
+removing punctuation from train split #2:  38%|██████████████████████████████████████████████████████████████████████████▋                                                                                                                          | 3610/9523 [00:00<00:00, 8635.06ex/s]
+removing punctuation from train split #3:  27%|█████████████████████████████████████████████████████▏                                                                                                                                               | 2573/9523 [00:00<00:00, 8169.76ex/s]
+removing punctuation from train split #4:  28%|██████████████████████████████████████████████████████▋                                                                                                                                              | 2645/9523 [00:00<00:00, 8221.26ex/s]
+removing punctuation from train split #5:  18%|███████████████████████████████████▏                                                                                                                                                                 | 1702/9523 [00:00<00:00, 8558.55ex/s]
+removing punctuation from train split #6:  18%|██████████████████████████████████▊                                                                                                                                                                  | 1681/9523 [00:00<00:00, 8425.32ex/s]
+removing punctuation from train split #7:   8%|████████████████▊                                                                                                                                                                                     | 808/9523 [00:00<00:01, 8072.29ex/s]
+removing punctuation from train split #8:   8%|████████████████▋                                                                                                                                                                                     | 805/9523 [00:00<00:01, 8040.58ex/s]
+removing punctuation from train split #9:   0%|                                                                                                                                                                                                                  | 0/9523 [00:00<?, ?ex/s]
+removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8481.42ex/s]
+removing punctuation from train split #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8461.66ex/s]
+removing punctuation from train split #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8427.97ex/s]
+removing punctuation from train split #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8516.76ex/s]
+removing punctuation from train split #14: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 7927.21ex/s]
+removing punctuation from train split #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8499.06ex/s]
+removing punctuation from train split #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8633.73ex/s]
+removing punctuation from train split #17: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8486.55ex/s]
+removing punctuation from train split #18: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8585.67ex/s]
+removing punctuation from train split #19: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 7760.56ex/s]
+removing punctuation from train split #10:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 9456/9523 [00:01<00:00, 8652.13ex/s]
+removing punctuation from train split #18:   8%|███████████████▉                                                                                                                                                                                     | 772/9522 [00:00<00:01, 7710.26ex/s]
+removing punctuation from train split #11:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏  | 9386/9523 [00:01<00:00, 8501.09ex/s]
+removing punctuation from train split #14:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                         | 6741/9522 [00:00<00:00, 8415.09ex/s]
+removing punctuation from train split #12:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 9428/9522 [00:01<00:00, 8585.01ex/s]
+removing punctuation from train split #14:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                        | 7583/9522 [00:00<00:00, 7851.40ex/s]
+removing punctuation from train split #13:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9446/9522 [00:01<00:00, 8604.60ex/s]
+removing punctuation from train split #19:  16%|██████████████████████████████▍                                                                                                                                                                     | 1477/9522 [00:00<00:01, 7415.09ex/s]
+removing punctuation from train split #14:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍     | 9250/9522 [00:01<00:00, 7508.57ex/s]
+removing punctuation from train split #15:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                         | 6723/9522 [00:00<00:00, 8640.73ex/s]
+removing punctuation from train split #15:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 9355/9522 [00:01<00:00, 8699.54ex/s]
+removing punctuation from train split #16:  90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                  | 8613/9522 [00:01<00:00, 8814.93ex/s]
+removing punctuation from train split #16: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 9507/9522 [00:01<00:00, 8850.28ex/s]
+removing punctuation from train split #18:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                      | 7665/9522 [00:00<00:00, 8704.64ex/s]
+removing punctuation from train split #17:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎   | 9343/9522 [00:01<00:00, 8677.83ex/s]
+removing punctuation from train split #18:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                   | 8558/9522 [00:01<00:00, 8771.31ex/s]
+removing punctuation from train split #18:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 9452/9522 [00:01<00:00, 8820.37ex/s]
+removing punctuation from train split #20:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                         | 6749/9522 [00:00<00:00, 8527.92ex/s]
+removing punctuation from train split #19:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏           | 8946/9522 [00:01<00:00, 8019.61ex/s]
+removing punctuation from train split #20:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                   | 8594/9522 [00:01<00:00, 8862.24ex/s]
+removing punctuation from train split #21:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                     | 8472/9522 [00:01<00:00, 8762.97ex/s]
+removing punctuation from train split #22:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                      | 7659/9522 [00:00<00:00, 8225.70ex/s]
+removing punctuation from train split #23:  82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                   | 7791/9522 [00:00<00:00, 8235.82ex/s]
+removing punctuation from train split #24:  72%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                      | 6896/9522 [00:00<00:00, 8226.84ex/s]
+removing punctuation from train split #25:  73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                     | 6949/9522 [00:00<00:00, 8818.96ex/s]
+removing punctuation from train split #26:  63%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                         | 5969/9522 [00:00<00:00, 8527.52ex/s]
+removing punctuation from train split #27:  64%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                       | 6069/9522 [00:00<00:00, 8666.82ex/s]
+removing punctuation from train split #28:  54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                          | 5110/9522 [00:00<00:00, 7781.88ex/s]
+removing punctuation from train split #29:  44%|█████████████████████████████████████████████████████████████████████████████████████▌                                                                                                              | 4154/9522 [00:00<00:00, 8279.41ex/s]
+removing punctuation from train split #30:  33%|████████████████████████████████████████████████████████████████▌                                                                                                                                   | 3137/9522 [00:00<00:00, 7633.15ex/s]
+removing punctuation from train split #26:  82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                   | 7800/9522 [00:00<00:00, 8510.99ex/s]
+removing punctuation from train split #28:  74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                   | 7018/9522 [00:00<00:00, 8692.28ex/s]
+removing punctuation from train split #26:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                | 8745/9522 [00:01<00:00, 8788.95ex/s]
+removing punctuation from train split #27:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋            | 8925/9522 [00:01<00:00, 9109.03ex/s]
+removing punctuation from train split #28:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                               | 7974/9522 [00:00<00:00, 8952.85ex/s]
+removing punctuation from train split #28:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋             | 8878/9522 [00:01<00:00, 8896.26ex/s]
+removing punctuation from train split #29:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎              | 8809/9522 [00:01<00:00, 9270.44ex/s]
+removing punctuation from train split #30:  82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                   | 7787/9522 [00:00<00:00, 9184.97ex/s]
+removing punctuation from train split #30:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏               | 8752/9522 [00:01<00:00, 9326.94ex/s]
+removing punctuation from train split #31:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌          | 9012/9522 [00:01<00:00, 9178.11ex/s]
+filtering data where the targets are ignored in scoring #15:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #16:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #17:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #18:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #19:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #20:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #21:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #22:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #23:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #24:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #25:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #26:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #27:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #28:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #29:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #30:   0%|                                                                                                                                                                                                 | 0/10 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.49ba/s]
+filtering data where the targets are ignored in scoring #22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.29ba/s]
+filtering data where the targets are ignored in scoring #23: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 80.34ba/s]
+filtering data where the targets are ignored in scoring #24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.83ba/s]
+filtering data where the targets are ignored in scoring #25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.29ba/s]
+filtering data where the targets are ignored in scoring #26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.60ba/s]
+filtering data where the targets are ignored in scoring #27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 102.79ba/s]
+filtering data where the targets are ignored in scoring #28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 110.12ba/s]
+filtering data where the targets are ignored in scoring #29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 113.55ba/s]
+filtering data where the targets are ignored in scoring #30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 117.08ba/s]
+filtering data where the targets are ignored in scoring #31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 122.67ba/s]
+filtering data where the targets are ignored in scoring #16:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #17:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #18:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #19:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #20:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #21:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #22:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #23:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #24:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #25:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #26:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #27:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #28:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #29:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #30:   0%|                                                                                                                                                                                                  | 0/2 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #10: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 109.38ba/s]
+filtering data where the targets are ignored in scoring #11: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 108.01ba/s]
+filtering data where the targets are ignored in scoring #7:   0%|                                                                                                                                                                                                   | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #8:   0%|                                                                                                                                                                                                   | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #9:   0%|                                                                                                                                                                                                   | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #10:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 110.23ba/s]
+filtering data where the targets are ignored in scoring #23: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 76.89ba/s]
+filtering data where the targets are ignored in scoring #24: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 75.60ba/s]
+filtering data where the targets are ignored in scoring #25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 108.68ba/s]
+filtering data where the targets are ignored in scoring #26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 109.06ba/s]
+filtering data where the targets are ignored in scoring #27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 108.32ba/s]
+filtering data where the targets are ignored in scoring #28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 109.66ba/s]
+filtering data where the targets are ignored in scoring #29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 108.10ba/s]
+filtering data where the targets are ignored in scoring #30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 111.74ba/s]
+filtering data where the targets are ignored in scoring #31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 114.96ba/s]
+https://symbolize.stripped_domain/r/?trace=7f0fef5193f4,7f0fef56d0bf,7f,3bd85f223c5d1c82&map=
+*** SIGTERM received by PID 301645 (TID 301645) on cpu 37 from PID 297645; stack trace: ***                                                                                                                                                                         | 0/3 [00:00<?, ?ba/s]
+PC: @     0x7f0fef5193f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7f0d9fd50294        976  (unknown)n scoring #18:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+    @     0x7f0fef56d0c0  1092705424  (unknown)
+    @               0x80  (unknown)  (unknown)n scoring #19:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+    @ 0x3bd85f223c5d1c83  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7f0fef5193f4,7f0d9fd50293,7f0fef56d0bf,7f,3bd85f223c5d1c82&map=fbcd4e3f2be272741f2aecd9d840a066:7f0d8a7b3000-7f0da00e2c60
+E0729 21:52:17.245708  301645 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                               | 0/3 [00:00<?, ?ba/s]
+E0729 21:52:17.315686  301645 process_state.cc:774] RAW: Raising signal 15 with default behavior                                                                                                                                                                    | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #22:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #23:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #24:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #25:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #26:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #27:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #28:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #29:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+filtering data where the targets are ignored in scoring #30:   0%|                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+preprocess dataset #19:   0%|▊                                                                                                                                                                                                                          | 34/9496 [00:01<05:19, 29.60ex/s]
+preprocess dataset #19:   1%|██▍                                                                                                                                                                                                                       | 108/9496 [00:03<04:20, 35.97ex/s]
+preprocess dataset #20:   1%|█▊                                                                                                                                                                                                                         | 79/9496 [00:02<04:29, 34.94ex/s]
+preprocess dataset #21:   1%|█                                                                                                                                                                                                                          | 48/9496 [00:01<04:45, 33.11ex/s]
+preprocess dataset #22:   1%|█                                                                                                                                                                                                                          | 48/9496 [00:01<03:53, 40.45ex/s]
+preprocess dataset #2:  10%|█████████████████████▊                                                                                                                                                                                                     | 947/9497 [00:23<06:13, 22.89ex/s]
+preprocess dataset #3:   9%|████████████████████▋                                                                                                                                                                                                      | 899/9497 [00:22<05:58, 23.99ex/s]
+preprocess dataset #4:   9%|████████████████████▏                                                                                                                                                                                                      | 873/9497 [00:21<05:36, 25.63ex/s]
+preprocess dataset #5:   9%|███████████████████▊                                                                                                                                                                                                       | 860/9497 [00:21<04:48, 29.89ex/s]
+preprocess dataset #6:   9%|██████████████████▊                                                                                                                                                                                                        | 817/9497 [00:20<04:57, 29.16ex/s]
+preprocess dataset #7:   7%|███████████████▋                                                                                                                                                                                                           | 682/9497 [00:19<04:23, 33.52ex/s]
+preprocess dataset #8:   8%|████████████████▍                                                                                                                                                                                                          | 714/9497 [00:18<05:57, 24.55ex/s]
+preprocess dataset #9:   6%|█████████████▌                                                                                                                                                                                                             | 590/9497 [00:17<04:41, 31.68ex/s]
+preprocess dataset #10:   6%|█████████████▌                                                                                                                                                                                                            | 590/9497 [00:17<04:57, 29.96ex/s]
+preprocess dataset #11:   6%|████████████▉                                                                                                                                                                                                             | 562/9496 [00:15<05:31, 26.94ex/s]
+preprocess dataset #12:   6%|████████████▌                                                                                                                                                                                                             | 546/9496 [00:15<05:24, 27.61ex/s]
+preprocess dataset #13:   5%|███████████▋                                                                                                                                                                                                              | 508/9496 [00:14<04:46, 31.40ex/s]
+preprocess dataset #14:   5%|█████████▉                                                                                                                                                                                                                | 434/9496 [00:13<05:30, 27.38ex/s]
+preprocess dataset #15:   4%|█████████▏                                                                                                                                                                                                                | 402/9496 [00:12<05:41, 26.64ex/s]
+preprocess dataset #16:   4%|████████▎                                                                                                                                                                                                                 | 363/9496 [00:11<04:30, 33.79ex/s]
+preprocess dataset #17:   3%|███████▎                                                                                                                                                                                                                  | 320/9496 [00:09<05:05, 30.01ex/s]
+preprocess dataset #18:   3%|███████▎                                                                                                                                                                                                                  | 318/9496 [00:09<04:46, 31.99ex/s]
+preprocess dataset #19:   3%|█████▊                                                                                                                                                                                                                    | 254/9496 [00:07<04:57, 31.09ex/s]
+preprocess dataset #20:   3%|█████▌                                                                                                                                                                                                                    | 244/9496 [00:07<05:02, 30.56ex/s]
+preprocess dataset #21:   2%|████▋                                                                                                                                                                                                                     | 205/9496 [00:06<04:39, 33.29ex/s]
+preprocess dataset #22:   2%|███▋                                                                                                                                                                                                                      | 163/9496 [00:04<04:43, 32.97ex/s]
+preprocess dataset #23:   1%|██▎                                                                                                                                                                                                                       | 102/9496 [00:03<06:12, 25.23ex/s]
+preprocess dataset #24:   1%|█▎                                                                                                                                                                                                                         | 59/9496 [00:02<05:39, 27.79ex/s]
+preprocess dataset #25:   0%|▋                                                                                                                                                                                                                          | 28/9496 [00:01<07:05, 22.27ex/s]
+preprocess dataset #26:   0%|█                                                                                                                                                                                                                          | 45/9496 [00:02<06:02, 26.09ex/s]
+preprocess dataset #27:   0%|▎                                                                                                                                                                                                                          | 16/9496 [00:01<09:02, 17.48ex/s]
+preprocess dataset #28:   1%|█▏                                                                                                                                                                                                                         | 51/9496 [00:02<06:30, 24.17ex/s]
+preprocess dataset #29:   0%|▍                                                                                                                                                                                                                          | 18/9496 [00:01<07:28, 21.13ex/s]
+preprocess dataset #30:   0%|▉                                                                                                                                                                                                                          | 38/9496 [00:02<08:47, 17.94ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #17:  51%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                          | 4843/9496 [02:36<02:14, 34.62ex/s]
+preprocess dataset #18:  52%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                                         | 4892/9496 [02:35<02:00, 38.10ex/s]
+preprocess dataset #19:  52%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                        | 4922/9496 [02:34<02:08, 35.61ex/s]
+preprocess dataset #20:  52%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                         | 4901/9496 [02:33<02:01, 37.72ex/s]
+preprocess dataset #21:  51%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                          | 4827/9496 [02:32<02:07, 36.71ex/s]
+preprocess dataset #22:  51%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                          | 4857/9496 [02:31<02:00, 38.37ex/s]
+preprocess dataset #23:  49%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                              | 4671/9496 [02:30<02:02, 39.33ex/s]
+preprocess dataset #24:  48%|███████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                 | 4549/9496 [02:29<02:09, 38.13ex/s]
+preprocess dataset #25:  48%|███████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                                                 | 4543/9496 [02:28<02:10, 37.93ex/s]
+preprocess dataset #26:  48%|████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                | 4590/9496 [02:26<02:10, 37.55ex/s]
+preprocess dataset #27:  48%|███████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                                 | 4532/9496 [02:26<02:20, 35.32ex/s]
+preprocess dataset #28:  47%|████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                    | 4418/9496 [02:25<02:19, 36.51ex/s]
+preprocess dataset #29:  47%|██████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                                  | 4484/9496 [02:23<02:05, 40.07ex/s]
+preprocess dataset #30:  47%|█████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                                    | 4420/9496 [02:22<02:13, 37.95ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #15:  72%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                             | 6821/9496 [03:41<01:08, 39.06ex/s]
+preprocess dataset #16:  72%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                             | 6814/9496 [03:40<01:10, 37.86ex/s]
+preprocess dataset #17:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                              | 6758/9496 [03:38<01:19, 34.23ex/s]
+preprocess dataset #18:  72%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                           | 6876/9496 [03:38<01:09, 37.67ex/s]
+preprocess dataset #19:  73%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                           | 6893/9496 [03:37<01:05, 39.57ex/s]
+preprocess dataset #20:  73%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                           | 6887/9496 [03:36<01:04, 40.21ex/s]
+preprocess dataset #21:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                              | 6763/9496 [03:35<01:13, 37.10ex/s]
+preprocess dataset #22:  72%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                             | 6796/9496 [03:33<01:18, 34.28ex/s]
+preprocess dataset #23:  69%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                  | 6598/9496 [03:33<01:22, 35.11ex/s]
+preprocess dataset #24:  68%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                     | 6441/9496 [03:31<01:29, 34.00ex/s]
+preprocess dataset #25:  68%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                     | 6472/9496 [03:30<01:39, 30.29ex/s]
+preprocess dataset #26:  68%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                    | 6499/9496 [03:29<01:29, 33.64ex/s]
+preprocess dataset #27:  67%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                      | 6406/9496 [03:28<01:33, 33.17ex/s]
+preprocess dataset #28:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                        | 6334/9496 [03:27<01:43, 30.41ex/s]
+preprocess dataset #29:  68%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                      | 6416/9496 [03:26<01:39, 30.96ex/s]
+preprocess dataset #30:  67%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                       | 6374/9496 [03:25<01:25, 36.34ex/s]
+
+
+
+
+preprocess dataset #30:  71%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                               | 6730/9496 [03:35<01:23, 33.01ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #2:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 9397/9497 [04:58<00:02, 36.72ex/s]
+preprocess dataset #3:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 9384/9497 [04:58<00:03, 35.29ex/s]
+preprocess dataset #4:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 9397/9497 [04:57<00:03, 32.70ex/s]
+preprocess dataset #5:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏  | 9376/9497 [04:56<00:03, 34.63ex/s]
+preprocess dataset #6:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊   | 9356/9497 [04:56<00:04, 35.09ex/s]
+preprocess dataset #7:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎      | 9206/9497 [04:54<00:07, 37.03ex/s]
+preprocess dataset #8:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎        | 9116/9497 [04:54<00:11, 32.94ex/s]
+preprocess dataset #9:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████           | 9023/9497 [04:53<00:18, 25.85ex/s]
+preprocess dataset #10:  94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉              | 8883/9497 [04:52<00:16, 37.69ex/s]
+preprocess dataset #11:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊             | 8917/9496 [04:51<00:16, 35.63ex/s]
+preprocess dataset #12:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍            | 8947/9496 [04:50<00:14, 36.97ex/s]
+preprocess dataset #13:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏              | 8847/9496 [04:49<00:18, 35.30ex/s]
+preprocess dataset #14:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊            | 8964/9496 [04:48<00:15, 34.16ex/s]
+preprocess dataset #15:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍             | 8900/9496 [04:47<00:16, 37.17ex/s]
+preprocess dataset #16:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏             | 8892/9496 [04:46<00:16, 36.36ex/s]
+preprocess dataset #17:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                | 8772/9496 [04:45<00:19, 36.28ex/s]
+preprocess dataset #18:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌            | 8951/9496 [04:44<00:14, 38.56ex/s]
+preprocess dataset #19:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████            | 8974/9496 [04:43<00:13, 39.15ex/s]
+preprocess dataset #20:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉             | 8922/9496 [04:42<00:17, 33.75ex/s]
+preprocess dataset #21:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌               | 8820/9496 [04:41<00:17, 39.03ex/s]
+preprocess dataset #22:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎              | 8851/9496 [04:40<00:18, 34.97ex/s]
+preprocess dataset #23:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                   | 8649/9496 [04:39<00:23, 36.48ex/s]
+preprocess dataset #24:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                      | 8520/9496 [04:37<00:26, 36.27ex/s]
+preprocess dataset #25:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                      | 8505/9496 [04:36<00:28, 35.28ex/s]
+preprocess dataset #26:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                     | 8542/9496 [04:35<00:27, 34.57ex/s]
+preprocess dataset #27:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                        | 8447/9496 [04:34<00:34, 30.08ex/s]
+preprocess dataset #28:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                         | 8369/9496 [04:33<00:32, 34.91ex/s]
+preprocess dataset #29:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                       | 8473/9496 [04:32<00:34, 29.51ex/s]
+preprocess dataset #30:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                       | 8470/9496 [04:31<00:29, 35.22ex/s]
+preprocess dataset #29:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                      | 8535/9496 [04:34<00:32, 29.70ex/s]
+preprocess dataset #30:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                      | 8530/9496 [04:33<00:30, 31.47ex/s]
+preprocess dataset #27:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                     | 8563/9496 [04:38<00:38, 24.28ex/s]
+preprocess dataset #28:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                       | 8493/9496 [04:37<00:35, 28.59ex/s]
+preprocess dataset #29:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                    | 8596/9496 [04:36<00:30, 29.63ex/s]
+preprocess dataset #29:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                   | 8662/9496 [04:38<00:18, 45.46ex/s]
+preprocess dataset #30:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                   | 8657/9496 [04:37<00:19, 42.29ex/s]
+
+preprocess dataset #30:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋               | 8824/9496 [04:41<00:16, 40.36ex/s]
+preprocess dataset #30:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎             | 8899/9496 [04:43<00:13, 43.28ex/s]
+preprocess dataset #30:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉            | 8969/9496 [04:45<00:16, 32.39ex/s]
+preprocess dataset #30:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋           | 9001/9496 [04:46<00:34, 14.30ex/s]
+preprocess dataset #30:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉           | 9012/9496 [04:47<00:21, 22.28ex/s]
+preprocess dataset #24:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉        | 9144/9496 [04:56<00:09, 38.53ex/s]
+preprocess dataset #25:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊        | 9136/9496 [04:55<00:08, 41.03ex/s]
+preprocess dataset #26:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏       | 9156/9496 [04:53<00:08, 41.20ex/s]
+preprocess dataset #27:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎         | 9073/9496 [04:52<00:10, 41.06ex/s]
+preprocess dataset #29:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍         | 9080/9496 [04:50<00:10, 40.38ex/s]
+preprocess dataset #29:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊         | 9095/9496 [04:50<00:10, 39.14ex/s]
+preprocess dataset #30:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████         | 9103/9496 [04:49<00:09, 39.83ex/s]
+preprocess dataset #27:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍       | 9163/9496 [04:54<00:05, 58.75ex/s]
+preprocess dataset #28:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋         | 9089/9496 [04:53<00:06, 64.99ex/s]
+preprocess dataset #29:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊       | 9183/9496 [04:52<00:05, 58.11ex/s]
+preprocess dataset #30:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████       | 9190/9496 [04:51<00:06, 48.41ex/s]
+
+preprocess dataset #27:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9427/9496 [04:58<00:01, 56.23ex/s]
+preprocess dataset #27:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 9434/9496 [04:58<00:01, 52.45ex/s]
+preprocess dataset #28:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊   | 9358/9496 [04:57<00:02, 63.89ex/s]
+preprocess dataset #29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 9452/9496 [04:56<00:00, 69.15ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9417/9496 [04:58<00:01, 77.68ex/s]
+preprocess dataset #2:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #3:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #4:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #5:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #6:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #7:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #8:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #9:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #10:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #11:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #12:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #13:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #14:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #15:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #16:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #17:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #18:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #19:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #20:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #21:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #22:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #23:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #24:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #25:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #26:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #27:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #28:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #29:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #30:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #28:  79%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                             | 1004/1266 [00:39<02:11,  1.99ex/s]
+preprocess dataset #29:  80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                          | 1018/1266 [00:39<00:42,  5.90ex/s]
+
+
+preprocess dataset #28:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌     | 1234/1266 [00:45<00:00, 35.33ex/s]
+preprocess dataset #29:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 1251/1266 [00:45<00:00, 36.69ex/s]
+preprocess dataset #30:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎     | 1233/1266 [00:45<00:00, 37.73ex/s]
+preprocess dataset #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1266/1267 [00:46<00:00, 36.74ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 1263/1266 [00:46<00:00, 38.41ex/s]
+preprocess dataset #23:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1259/1266 [00:46<00:00, 34.28ex/s]
+preprocess dataset #10: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1267/1267 [00:47<00:00, 27.83ex/s]
+preprocess dataset #14: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1267/1267 [00:46<00:00, 35.30ex/s]
+preprocess dataset #16: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 1261/1267 [00:46<00:00, 33.85ex/s]
+preprocess dataset #25: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1261/1266 [00:46<00:00, 35.33ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1259/1266 [00:46<00:00, 35.65ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 1263/1266 [00:46<00:00, 36.24ex/s]
+preprocess dataset #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 1265/1267 [00:46<00:00, 29.56ex/s]
+preprocess dataset #30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1266/1266 [00:46<00:00, 38.46ex/s]
+preprocess dataset #4:   1%|█▏                                                                                                                                                                                                                          | 14/2555 [00:00<02:03, 20.57ex/s]
+preprocess dataset #5:   0%|█                                                                                                                                                                                                                           | 12/2555 [00:00<02:21, 18.03ex/s]
+preprocess dataset #6:   1%|█                                                                                                                                                                                                                           | 13/2554 [00:00<02:06, 20.04ex/s]
+preprocess dataset #7:   0%|▉                                                                                                                                                                                                                           | 11/2554 [00:00<02:24, 17.56ex/s]
+preprocess dataset #8:   0%|▊                                                                                                                                                                                                                            | 9/2554 [00:00<02:53, 14.66ex/s]
+preprocess dataset #9:   0%|▉                                                                                                                                                                                                                           | 11/2554 [00:00<02:21, 17.92ex/s]
+preprocess dataset #10:   0%|▋                                                                                                                                                                                                                           | 8/2554 [00:00<03:03, 13.88ex/s]
+preprocess dataset #11:   0%|▍                                                                                                                                                                                                                           | 5/2554 [00:00<05:21,  7.92ex/s]
+preprocess dataset #12:   0%|▊                                                                                                                                                                                                                           | 9/2554 [00:00<02:45, 15.41ex/s]
+preprocess dataset #13:   0%|▎                                                                                                                                                                                                                           | 4/2554 [00:00<06:03,  7.01ex/s]
+preprocess dataset #14:   0%|                                                                                                                                                                                                                            | 1/2554 [00:00<28:23,  1.50ex/s]
+preprocess dataset #15:   0%|                                                                                                                                                                                                                            | 1/2554 [00:00<24:36,  1.73ex/s]
+preprocess dataset #16:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #17:   0%|                                                                                                                                                                                                                            | 1/2554 [00:00<26:57,  1.58ex/s]
+preprocess dataset #18:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #19:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #20:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #21:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #22:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #23:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #24:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #25:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #26:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #27:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #28:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #29:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #30:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #29:  58%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                           | 1477/2554 [00:46<00:31, 33.76ex/s]
+preprocess dataset #30:  58%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                           | 1480/2554 [00:46<00:32, 32.94ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #23: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2547/2554 [01:18<00:00, 23.44ex/s]
+preprocess dataset #30:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋    | 2503/2554 [01:17<00:01, 29.94ex/s]
+preprocess dataset #25:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊   | 2516/2554 [01:18<00:01, 30.03ex/s]
+preprocess dataset #26:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 2514/2554 [01:18<00:01, 24.01ex/s]
+preprocess dataset #27:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 2522/2554 [01:18<00:01, 29.83ex/s]
+preprocess dataset #28:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏  | 2521/2554 [01:18<00:01, 24.77ex/s]
+preprocess dataset #29:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋    | 2503/2554 [01:18<00:01, 26.65ex/s]
+preprocess dataset #30:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 2513/2554 [01:18<00:01, 24.49ex/s]
+preprocess dataset #27: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 2552/2554 [01:19<00:00, 37.21ex/s]
+preprocess dataset #22:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 2535/2554 [01:19<00:00, 32.43ex/s]
+preprocess dataset #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 2543/2554 [01:19<00:00, 32.10ex/s]
+preprocess dataset #22:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 2539/2554 [01:19<00:00, 33.98ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 2553/2554 [01:19<00:00, 30.13ex/s]
+preprocess dataset #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2547/2554 [01:19<00:00, 33.66ex/s]
+preprocess dataset #31: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 2545/2554 [01:19<00:00, 35.25ex/s]
+preprocess dataset #26: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 2552/2554 [01:19<00:00, 31.98ex/s]
+preprocess dataset #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 2543/2554 [01:20<00:00, 34.55ex/s]
+preprocess dataset #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2551/2554 [01:20<00:00, 30.72ex/s]
+preprocess dataset #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2547/2554 [01:20<00:00, 29.66ex/s]
+preprocess dataset #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2551/2554 [01:20<00:00, 30.73ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 2544/2554 [01:19<00:00, 32.64ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2548/2554 [01:19<00:00, 29.77ex/s]
+#16: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 532.18ba/s]
+#17: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 520.92ba/s]
+#18: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 540.48ba/s]
+#19: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 497.20ba/s]
+#20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 529.54ba/s]
+#21: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 343.93ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 540.20ba/s]
+#22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 341.92ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 536.18ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 548.71ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 557.23ba/s]
+#23: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 363.14ba/s]
+#25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 355.62ba/s]
+#24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 361.76ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 459.50ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 402.57ba/s]
+#19:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 273.46ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 262.68ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 268.85ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 257.91ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 255.84ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 227.31ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 246.02ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 246.50ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 256.90ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 256.24ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 258.03ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 271.09ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 311.02ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 300.91ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 221.87ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 311.94ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 343.26ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 131.28ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 134.26ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 130.86ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 136.07ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 138.64ba/s]
+https://symbolize.stripped_domain/r/?trace=7f0fef5193f4,7f0fef56d0bf,7f,39af0276bb8bf16c&map=                                                                                                                                                                      | 0/10 [00:00<?, ?ba/s]
+*** SIGTERM received by PID 3497718 (TID 3497718) on cpu 84 from PID 297645; stack trace: ***
+https://symbolize.stripped_domain/r/?trace=7f0fef5193f4,7f0fef56d0bf,7f,39af0276bb8bf16c&map=
+*** SIGTERM received by PID 3497791 (TID 3497791) on cpu 93 from PID 297645; stack trace: ***                                                                                                                                                                      | 0/10 [00:00<?, ?ba/s]
+PC: @     0x7f0fef5193f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7f0d9fd50294        976  (unknown)
+    @     0x7f0fef56d0c0  1092705424  (unknown)                                                                                                                                                                                                                    | 0/10 [00:00<?, ?ba/s]
+PC: @     0x7f0fef5193f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7f0d9fd50294        976  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @     0x7f0fef56d0c0  1092705424  (unknown)
+    @               0x80  (unknown)  (unknown)
+    @               0x80  (unknown)  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @ 0x39af0276bb8bf16d  (unknown)  (unknown)
+    @ 0x39af0276bb8bf16d  (unknown)  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+https://symbolize.stripped_domain/r/?trace=https://symbolize.stripped_domain/r/?trace=7f0fef5193f4,7f0fef5193f4,7f0d9fd50293,7f0d9fd50293,7f0fef56d0bf,7f0fef56d0bf,7f,7f,39af0276bb8bf16c39af0276bb8bf16c&map=&map=fbcd4e3f2be272741f2aecd9d840a066:7f0d8a7b3000-7f0da00e2c60fbcd4e3f2be272741f2aecd9d840a066:7f0d8a7b3000-7f0da00e2c60
+ 29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+E0729 22:02:44.707703 3497791 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0729 22:02:44.707715 3497718 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0729 22:02:44.781707 3497791 process_state.cc:774] RAW: Raising signal 15 with default behavior                                                                                                                                                                   | 0/10 [00:00<?, ?ba/s]
+E0729 22:02:44.781809 3497718 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#0: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 96.72ba/s]
+#2: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 119.21ba/s]
+#1: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 81.04ba/s]
+#3: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 121.42ba/s]
+#4: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 119.73ba/s]
+#6: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 120.20ba/s]
+#5: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 119.78ba/s]
+#7: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 116.96ba/s]
+#8: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 110.41ba/s]
+#9: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 108.76ba/s]
+#10: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 114.55ba/s]
+#11: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 114.51ba/s]
+#12: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 120.68ba/s]
+#14: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 115.94ba/s]
+#13: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 80.59ba/s]
+#16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 115.82ba/s]
+#15: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 101.18ba/s]
+#17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 121.11ba/s]
+#18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 120.02ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 118.28ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 121.43ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 123.51ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 124.16ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 122.55ba/s]
+#23: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 83.77ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 122.36ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 107.65ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 119.82ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 125.41ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 124.47ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 126.57ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 130.03ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#31:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#16:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+E0729 22:02:55.503395 3498030 process_state.cc:774] RAW: Raising signal 15 with default behavior                                                                                                                                                                    | 0/3 [00:00<?, ?ba/s]
+Downloading builder script: 5.59kB [00:00, 3.91MB/s]
+Feature extractor saved in ./preprocessor_config.json                                                                                                                                                                                                               | 0/3 [00:00<?, ?ba/s]
+tokenizer config file saved in ./tokenizer_config.json
+Special tokens file saved in ./special_tokens_map.json                                                                                                                                                                                                              | 0/3 [00:00<?, ?ba/s]
+added tokens file saved in ./added_tokens.json
+Configuration saved in ./config.json                                                                                                                                                                                                                                | 0/3 [00:00<?, ?ba/s]
+loading feature extractor configuration file ./preprocessor_config.json
+Traceback (most recent call last):                                                                                                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+  File "run_flax_speech_recognition_ctc.py", line 1209, in main
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)                                                                                                                                                                                             | 0/3 [00:00<?, ?ba/s]
+  File "/data/flax/lib/python3.8/site-packages/transformers/models/auto/processing_auto.py", line 243, in from_pretrained
+    return processor_class.from_pretrained(                                                                                                                                                                                                                         | 0/3 [00:00<?, ?ba/s]
+  File "/data/flax/lib/python3.8/site-packages/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py", line 138, in from_pretrained
+    requires_backends(cls, "pyctcdecode")                                                                                                                                                                                                                           | 0/3 [00:00<?, ?ba/s]
+  File "/data/flax/lib/python3.8/site-packages/transformers/utils/import_utils.py", line 906, in requires_backends                                                                                                                                                  | 0/3 [00:00<?, ?ba/s]
+    raise ImportError("".join(failed))
+ImportError:
+Wav2Vec2ProcessorWithLM requires the pyctcdecode library but it was not found in your environment. You can install it with pip:
+`pip install pyctcdecode`
\ No newline at end of file
diff --git a/wandb/run-20220729_215007-14bu0ptz/files/requirements.txt b/wandb/run-20220729_215007-14bu0ptz/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b303a570537fe28e87294ddddb762cde10f512a3
--- /dev/null
+++ b/wandb/run-20220729_215007-14bu0ptz/files/requirements.txt
@@ -0,0 +1,153 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+jiwer==2.3.0
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pygments==2.11.1
+pyparsing==3.0.6
+python-dateutil==2.8.2
+python-levenshtein==0.12.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220729_215007-14bu0ptz/files/wandb-metadata.json b/wandb/run-20220729_215007-14bu0ptz/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..9f5f14d913f59602fe24f1304d2b4d0072abf60f
--- /dev/null
+++ b/wandb/run-20220729_215007-14bu0ptz/files/wandb-metadata.json
@@ -0,0 +1,66 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-29T21:50:10.695339",
+    "startedAt": "2022-07-29T21:50:07.427256",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=128",
+        "--per_device_eval_batch_size=128",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220729_215007-14bu0ptz/files/wandb-summary.json b/wandb/run-20220729_215007-14bu0ptz/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..2ba0b21bbd6f8c2abc2685e33b44971509f6611b
--- /dev/null
+++ b/wandb/run-20220729_215007-14bu0ptz/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 771}}
\ No newline at end of file
diff --git a/wandb/run-20220729_215007-14bu0ptz/logs/debug-internal.log b/wandb/run-20220729_215007-14bu0ptz/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..ed3d08e9e654cfa219112f7b4e08b54a5256ab37
--- /dev/null
+++ b/wandb/run-20220729_215007-14bu0ptz/logs/debug-internal.log
@@ -0,0 +1,527 @@
+2022-07-29 21:50:08,301 INFO    MainThread:298911 [internal.py:wandb_internal():87] W&B internal server running at pid: 298911, started at: 2022-07-29 21:50:08.301705
+2022-07-29 21:50:08,303 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: check_version
+2022-07-29 21:50:08,304 INFO    WriterThread:298911 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/run-14bu0ptz.wandb
+2022-07-29 21:50:08,305 DEBUG   SenderThread:298911 [sender.py:send():234] send: header
+2022-07-29 21:50:08,305 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: check_version
+2022-07-29 21:50:08,340 DEBUG   SenderThread:298911 [sender.py:send():234] send: run
+2022-07-29 21:50:08,516 INFO    SenderThread:298911 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files
+2022-07-29 21:50:08,516 INFO    SenderThread:298911 [sender.py:_start_run_threads():804] run started: 14bu0ptz with start time 1659131407
+2022-07-29 21:50:08,517 DEBUG   SenderThread:298911 [sender.py:send():234] send: summary
+2022-07-29 21:50:08,517 INFO    SenderThread:298911 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 21:50:08,518 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: run_start
+2022-07-29 21:50:09,519 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/wandb-summary.json
+2022-07-29 21:50:10,695 DEBUG   HandlerThread:298911 [meta.py:__init__():40] meta init
+2022-07-29 21:50:10,695 DEBUG   HandlerThread:298911 [meta.py:__init__():54] meta init done
+2022-07-29 21:50:10,695 DEBUG   HandlerThread:298911 [meta.py:probe():214] probe
+2022-07-29 21:50:10,696 DEBUG   HandlerThread:298911 [meta.py:_setup_git():204] setup git
+2022-07-29 21:50:10,728 DEBUG   HandlerThread:298911 [meta.py:_setup_git():211] setup git done
+2022-07-29 21:50:10,728 DEBUG   HandlerThread:298911 [meta.py:_save_code():92] save code
+2022-07-29 21:50:10,739 DEBUG   HandlerThread:298911 [meta.py:_save_code():113] save code done
+2022-07-29 21:50:10,739 DEBUG   HandlerThread:298911 [meta.py:_save_patches():130] save patches
+2022-07-29 21:50:10,795 DEBUG   HandlerThread:298911 [meta.py:_save_patches():172] save patches done
+2022-07-29 21:50:10,795 DEBUG   HandlerThread:298911 [meta.py:_save_pip():58] save pip
+2022-07-29 21:50:10,795 DEBUG   HandlerThread:298911 [meta.py:_save_pip():72] save pip done
+2022-07-29 21:50:10,796 DEBUG   HandlerThread:298911 [meta.py:probe():252] probe done
+2022-07-29 21:50:10,798 DEBUG   SenderThread:298911 [sender.py:send():234] send: files
+2022-07-29 21:50:10,798 INFO    SenderThread:298911 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-29 21:50:10,799 INFO    SenderThread:298911 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-29 21:50:10,803 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:50:10,804 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:50:11,380 INFO    Thread-11 :298911 [upload_job.py:push():137] Uploaded file /tmp/tmpneh5re3ewandb/36w8ceyc-wandb-metadata.json
+2022-07-29 21:50:11,492 INFO    Thread-12 :298911 [upload_job.py:push():137] Uploaded file /tmp/tmpneh5re3ewandb/xxs5nc4w-code/run_flax_speech_recognition_ctc.py
+2022-07-29 21:50:11,523 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/requirements.txt
+2022-07-29 21:50:11,523 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/wandb-metadata.json
+2022-07-29 21:50:11,523 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/code/run_flax_speech_recognition_ctc.py
+2022-07-29 21:50:11,524 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:11,524 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/code
+2022-07-29 21:50:13,524 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:15,525 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:17,526 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:19,527 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:21,528 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:23,529 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:25,995 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:50:25,995 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:50:31,532 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:33,533 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:35,534 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:37,535 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:38,776 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:50:39,536 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:41,135 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:50:41,135 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:50:41,537 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:43,537 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:47,539 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:49,540 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:56,291 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:50:56,291 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:50:57,544 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:50:59,544 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:51:08,846 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:51:11,428 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:51:11,428 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:51:13,551 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:51:15,552 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:51:21,555 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:51:23,555 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:51:25,556 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:51:26,590 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:51:26,591 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:51:38,921 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:51:41,901 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:51:41,901 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:51:54,567 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:51:56,568 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:51:57,064 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:51:57,064 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:51:58,569 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:52:00,570 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:52:02,571 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:52:04,572 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:52:09,000 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:52:10,575 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:52:12,279 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:52:12,279 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:52:16,577 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:52:18,578 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:52:27,428 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:52:27,428 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:52:39,076 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:52:42,601 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:52:42,602 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:52:57,764 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:52:57,765 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:53:09,145 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:53:12,938 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:53:12,938 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:53:16,601 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:18,602 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:20,603 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:22,604 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:24,605 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:26,606 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:28,177 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:53:28,178 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:53:28,607 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:30,610 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:32,611 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:34,611 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:36,613 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:38,613 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:39,213 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:53:40,614 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:42,615 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:43,323 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:53:43,324 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:53:44,616 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:46,617 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:48,618 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:50,619 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:52,620 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:54,621 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:56,622 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:53:58,472 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:53:58,472 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:53:58,623 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:00,624 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:02,625 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:04,626 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:06,626 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:08,627 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:09,287 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:54:10,628 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:12,629 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:13,614 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:54:13,614 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:54:14,630 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:16,631 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:18,632 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:20,633 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:22,634 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:24,634 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:26,635 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:28,636 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:28,791 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:54:28,792 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:54:30,638 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:32,638 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:34,639 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:36,640 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:38,641 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:39,373 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:54:40,642 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:42,643 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:43,952 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:54:43,953 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:54:44,644 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:46,645 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:48,646 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:50,646 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:53,648 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:55,649 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:57,650 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:54:59,098 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:54:59,099 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:54:59,651 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:01,652 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:03,653 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:05,654 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:07,655 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:09,450 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:55:09,656 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:11,656 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:13,657 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:14,246 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:55:14,247 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:55:15,658 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:17,660 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:19,660 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:21,661 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:23,662 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:25,663 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:27,665 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:29,398 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:55:29,398 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:55:29,666 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:31,667 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:33,668 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:35,671 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:37,672 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:39,527 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:55:39,673 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:41,674 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:43,675 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:44,574 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:55:44,574 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:55:45,676 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:47,677 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:49,678 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:51,679 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:53,680 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:55,682 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:57,687 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:59,688 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:55:59,750 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:55:59,750 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:56:01,689 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:03,690 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:05,691 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:07,692 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:09,611 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:56:09,693 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:11,694 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:13,695 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:14,887 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:56:14,887 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:56:15,696 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:17,697 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:19,698 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:21,699 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:23,701 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:25,702 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:27,703 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:29,704 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:30,062 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:56:30,063 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:56:31,705 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:33,706 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:35,707 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:37,708 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:39,686 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:56:39,709 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:41,711 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:43,712 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:45,213 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:56:45,214 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:56:45,713 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:47,714 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:49,715 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:51,716 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:53,717 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:55,719 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:57,720 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:56:59,721 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:00,371 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:57:00,371 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:57:01,722 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:03,724 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:05,724 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:07,726 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:09,727 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:09,772 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:57:11,728 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:13,732 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:15,514 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:57:15,514 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:57:15,733 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:17,734 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:19,735 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:21,736 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:23,737 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:25,740 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:27,741 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:29,742 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:30,657 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:57:30,657 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:57:31,743 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:33,744 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:35,745 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:37,746 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:39,746 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:39,849 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:57:41,747 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:43,748 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:45,749 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:45,801 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:57:45,801 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:57:47,750 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:49,751 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:51,752 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:53,752 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:55,754 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:57,756 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:57:59,755 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:58:01,087 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:58:01,088 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:58:01,756 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:58:03,757 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:58:05,758 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:58:07,759 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:58:09,760 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:58:09,918 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:58:11,761 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:58:13,761 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:58:15,762 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:58:16,226 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:58:16,226 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:58:17,763 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:58:19,764 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:58:21,765 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:58:23,765 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:58:31,374 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:58:31,374 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:58:39,989 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:58:46,520 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:58:46,520 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:59:01,818 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:59:01,818 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:59:06,784 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:08,785 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:10,067 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:59:10,786 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:12,787 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:14,788 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:16,789 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:17,092 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:59:17,092 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:59:18,789 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:20,790 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:22,791 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:24,792 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:26,793 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:28,794 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:30,795 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:32,246 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:59:32,246 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:59:32,796 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:34,796 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:36,797 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:40,134 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 21:59:44,801 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:46,801 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:47,409 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 21:59:47,409 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 21:59:48,803 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:50,804 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 21:59:52,804 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:00:02,562 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:00:02,562 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:00:10,206 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 22:00:17,728 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:00:17,729 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:00:28,817 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:00:30,818 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:00:32,819 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:00:32,893 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:00:32,894 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:00:34,821 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:00:36,822 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:00:38,823 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:00:40,285 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 22:00:40,824 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:00:42,826 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:00:44,826 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:00:46,826 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:00:48,055 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:00:48,056 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:00:48,827 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:00:50,828 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:00:52,830 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:00:54,831 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:00:56,832 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:02,835 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:03,222 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:01:03,223 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:01:04,836 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:06,837 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:08,838 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:10,360 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 22:01:10,839 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:12,840 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:14,841 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:16,842 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:18,367 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:01:18,367 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:01:18,843 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:20,844 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:22,845 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:24,845 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:26,847 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:28,848 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:30,849 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:32,850 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:33,510 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:01:33,510 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:01:34,851 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:36,852 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:38,853 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:40,442 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 22:01:40,854 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:42,855 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:44,856 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:46,857 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:01:48,647 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:01:48,647 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:01:48,858 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:02:03,781 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:02:03,782 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:02:10,517 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 22:02:18,920 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:02:18,920 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:02:31,876 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:02:34,124 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:02:34,124 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:02:35,878 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:02:40,592 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 22:02:41,880 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:02:45,882 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:02:49,276 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:02:49,276 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:02:51,885 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:02:56,888 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:02:58,888 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:02:59,979 DEBUG   SenderThread:298911 [sender.py:send():234] send: telemetry
+2022-07-29 22:02:59,979 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:02:59,980 DEBUG   SenderThread:298911 [sender.py:send():234] send: exit
+2022-07-29 22:02:59,980 INFO    SenderThread:298911 [sender.py:send_exit():366] handling exit code: 1
+2022-07-29 22:02:59,981 INFO    SenderThread:298911 [sender.py:send_exit():368] handling runtime: 771
+2022-07-29 22:02:59,981 INFO    SenderThread:298911 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 22:02:59,981 INFO    SenderThread:298911 [sender.py:send_exit():374] send defer
+2022-07-29 22:02:59,981 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:02:59,982 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:02:59,982 INFO    HandlerThread:298911 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-29 22:02:59,982 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:02:59,982 INFO    SenderThread:298911 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-29 22:02:59,982 INFO    SenderThread:298911 [sender.py:transition_state():387] send defer: 1
+2022-07-29 22:02:59,982 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:02:59,983 INFO    HandlerThread:298911 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-29 22:03:00,039 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:03:00,039 INFO    SenderThread:298911 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-29 22:03:00,039 INFO    SenderThread:298911 [sender.py:transition_state():387] send defer: 2
+2022-07-29 22:03:00,039 DEBUG   SenderThread:298911 [sender.py:send():234] send: stats
+2022-07-29 22:03:00,039 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:03:00,039 INFO    HandlerThread:298911 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-29 22:03:00,040 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:03:00,040 INFO    SenderThread:298911 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-29 22:03:00,040 INFO    SenderThread:298911 [sender.py:transition_state():387] send defer: 3
+2022-07-29 22:03:00,040 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:03:00,040 INFO    HandlerThread:298911 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-29 22:03:00,040 DEBUG   SenderThread:298911 [sender.py:send():234] send: summary
+2022-07-29 22:03:00,040 INFO    SenderThread:298911 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 22:03:00,041 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:03:00,041 INFO    SenderThread:298911 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-29 22:03:00,041 INFO    SenderThread:298911 [sender.py:transition_state():387] send defer: 4
+2022-07-29 22:03:00,041 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:03:00,041 INFO    HandlerThread:298911 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-29 22:03:00,041 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:03:00,041 INFO    SenderThread:298911 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-29 22:03:00,084 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:03:00,226 INFO    SenderThread:298911 [sender.py:transition_state():387] send defer: 5
+2022-07-29 22:03:00,226 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:03:00,227 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:03:00,227 INFO    HandlerThread:298911 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-29 22:03:00,227 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:03:00,227 INFO    SenderThread:298911 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-29 22:03:00,227 INFO    SenderThread:298911 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-29 22:03:00,329 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:03:00,889 INFO    Thread-8  :298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/wandb-summary.json
+2022-07-29 22:03:00,890 INFO    SenderThread:298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:03:00,890 INFO    SenderThread:298911 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/config.yaml
+2022-07-29 22:03:00,890 INFO    SenderThread:298911 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files
+2022-07-29 22:03:00,890 INFO    SenderThread:298911 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/config.yaml config.yaml
+2022-07-29 22:03:00,890 INFO    SenderThread:298911 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/requirements.txt requirements.txt
+2022-07-29 22:03:00,891 INFO    SenderThread:298911 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log output.log
+2022-07-29 22:03:00,891 INFO    SenderThread:298911 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/wandb-summary.json wandb-summary.json
+2022-07-29 22:03:00,891 INFO    SenderThread:298911 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/wandb-metadata.json wandb-metadata.json
+2022-07-29 22:03:00,894 INFO    SenderThread:298911 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-29 22:03:00,894 INFO    SenderThread:298911 [sender.py:transition_state():387] send defer: 6
+2022-07-29 22:03:00,894 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:03:00,896 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:03:00,896 INFO    HandlerThread:298911 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-29 22:03:00,900 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:03:00,900 INFO    SenderThread:298911 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-29 22:03:00,900 INFO    SenderThread:298911 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 22:03:00,998 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:03:00,998 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:03:01,099 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:03:01,100 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:03:01,201 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:03:01,201 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:03:01,303 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:03:01,303 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:03:01,360 INFO    Thread-16 :298911 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/wandb-summary.json
+2022-07-29 22:03:01,369 INFO    Thread-14 :298911 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/requirements.txt
+2022-07-29 22:03:01,393 INFO    Thread-13 :298911 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/config.yaml
+2022-07-29 22:03:01,404 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:03:01,405 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:03:01,506 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:03:01,506 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:03:01,608 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:03:01,608 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:03:01,634 INFO    Thread-15 :298911 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/files/output.log
+2022-07-29 22:03:01,709 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:03:01,709 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:03:01,810 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:03:01,811 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:03:01,834 INFO    Thread-7  :298911 [sender.py:transition_state():387] send defer: 7
+2022-07-29 22:03:01,835 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:03:01,835 INFO    HandlerThread:298911 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-29 22:03:01,835 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:03:01,835 INFO    SenderThread:298911 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-29 22:03:01,912 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:03:01,970 INFO    SenderThread:298911 [sender.py:transition_state():387] send defer: 8
+2022-07-29 22:03:01,970 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:03:01,970 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:03:01,970 INFO    HandlerThread:298911 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-29 22:03:01,970 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:03:01,970 INFO    SenderThread:298911 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-29 22:03:01,970 INFO    SenderThread:298911 [sender.py:transition_state():387] send defer: 9
+2022-07-29 22:03:01,971 DEBUG   SenderThread:298911 [sender.py:send():234] send: final
+2022-07-29 22:03:01,971 DEBUG   SenderThread:298911 [sender.py:send():234] send: footer
+2022-07-29 22:03:01,971 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:03:01,971 INFO    HandlerThread:298911 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-29 22:03:01,971 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:03:01,971 INFO    SenderThread:298911 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-29 22:03:02,071 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:03:02,071 DEBUG   SenderThread:298911 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:03:02,072 INFO    SenderThread:298911 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 22:03:02,337 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-29 22:03:02,339 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-29 22:03:02,339 DEBUG   HandlerThread:298911 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-29 22:03:02,339 INFO    HandlerThread:298911 [handler.py:finish():731] shutting down handler
+2022-07-29 22:03:02,972 INFO    WriterThread:298911 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/run-14bu0ptz.wandb
+2022-07-29 22:03:03,336 INFO    SenderThread:298911 [sender.py:finish():1070] shutting down sender
+2022-07-29 22:03:03,336 INFO    SenderThread:298911 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 22:03:03,336 INFO    SenderThread:298911 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 22:03:03,369 INFO    MainThread:298911 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220729_215007-14bu0ptz/logs/debug.log b/wandb/run-20220729_215007-14bu0ptz/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..c93b96cb6624f6089dc64d22b6b20b5b7c67fd00
--- /dev/null
+++ b/wandb/run-20220729_215007-14bu0ptz/logs/debug.log
@@ -0,0 +1,157 @@
+2022-07-29 21:50:07,430 INFO    MainThread:297645 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-29 21:50:07,430 INFO    MainThread:297645 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-29 21:50:07,430 INFO    MainThread:297645 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/logs/debug.log
+2022-07-29 21:50:07,430 INFO    MainThread:297645 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_215007-14bu0ptz/logs/debug-internal.log
+2022-07-29 21:50:07,430 INFO    MainThread:297645 [wandb_init.py:init():404] calling init triggers
+2022-07-29 21:50:07,430 INFO    MainThread:297645 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-29 21:50:07,430 INFO    MainThread:297645 [wandb_init.py:init():460] starting backend
+2022-07-29 21:50:07,430 INFO    MainThread:297645 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-29 21:50:07,471 INFO    MainThread:297645 [backend.py:ensure_launched():216] starting backend process...
+2022-07-29 21:50:07,509 INFO    MainThread:297645 [backend.py:ensure_launched():221] started backend process with pid: 298911
+2022-07-29 21:50:07,511 INFO    MainThread:297645 [wandb_init.py:init():469] backend started and connected
+2022-07-29 21:50:07,526 INFO    MainThread:297645 [wandb_init.py:init():533] updated telemetry
+2022-07-29 21:50:07,619 INFO    MainThread:297645 [wandb_init.py:init():563] communicating current version
+2022-07-29 21:50:08,339 INFO    MainThread:297645 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-29 21:50:08,339 INFO    MainThread:297645 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-29 21:50:08,518 INFO    MainThread:297645 [wandb_init.py:init():606] starting run threads in backend
+2022-07-29 21:50:10,802 INFO    MainThread:297645 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-29 21:50:10,803 INFO    MainThread:297645 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-29 21:50:10,803 INFO    MainThread:297645 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-29 21:50:10,805 INFO    MainThread:297645 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-29 21:50:10,805 INFO    MainThread:297645 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-29 22:02:57,279 INFO    MainThread:297645 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-29 22:02:57,282 INFO    MainThread:297645 [wandb_run.py:_restore():1752] restore
+2022-07-29 22:02:59,982 INFO    MainThread:297645 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73946
+  total_bytes: 73946
+}
+
+2022-07-29 22:03:00,227 INFO    MainThread:297645 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73946
+  total_bytes: 73946
+}
+
+2022-07-29 22:03:00,897 INFO    MainThread:297645 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73946
+  total_bytes: 400156
+}
+
+2022-07-29 22:03:00,999 INFO    MainThread:297645 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73946
+  total_bytes: 400156
+}
+
+2022-07-29 22:03:01,100 INFO    MainThread:297645 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400156
+  total_bytes: 400156
+}
+
+2022-07-29 22:03:01,202 INFO    MainThread:297645 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400156
+  total_bytes: 400156
+}
+
+2022-07-29 22:03:01,303 INFO    MainThread:297645 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400156
+  total_bytes: 400156
+}
+
+2022-07-29 22:03:01,405 INFO    MainThread:297645 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400156
+  total_bytes: 400156
+}
+
+2022-07-29 22:03:01,507 INFO    MainThread:297645 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400156
+  total_bytes: 400156
+}
+
+2022-07-29 22:03:01,608 INFO    MainThread:297645 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400156
+  total_bytes: 400156
+}
+
+2022-07-29 22:03:01,710 INFO    MainThread:297645 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400156
+  total_bytes: 400156
+}
+
+2022-07-29 22:03:01,811 INFO    MainThread:297645 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400156
+  total_bytes: 400156
+}
+
+2022-07-29 22:03:01,970 INFO    MainThread:297645 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400156
+  total_bytes: 400156
+}
+
+2022-07-29 22:03:02,336 INFO    MainThread:297645 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400156
+  total_bytes: 400156
+}
+local_info {
+}
+
+2022-07-29 22:03:03,982 INFO    MainThread:297645 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220729_215007-14bu0ptz/run-14bu0ptz.wandb b/wandb/run-20220729_215007-14bu0ptz/run-14bu0ptz.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..90155c720a9defd0aa21858d27149defac6b9600
--- /dev/null
+++ b/wandb/run-20220729_215007-14bu0ptz/run-14bu0ptz.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb8d945e54f4eeb5ca02690e54700691dedc8d71afe067938aeee990e2f20978
+size 452867
diff --git a/wandb/run-20220729_224434-36odnm43/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_224434-36odnm43/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac
--- /dev/null
+++ b/wandb/run-20220729_224434-36odnm43/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1605 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220729_224434-36odnm43/files/config.yaml b/wandb/run-20220729_224434-36odnm43/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a5e0a7760e8ef146c889f2d8c223a91c6a63835e
--- /dev/null
+++ b/wandb/run-20220729_224434-36odnm43/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659134674
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220729_224434-36odnm43/files/output.log b/wandb/run-20220729_224434-36odnm43/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..7cf6214d8c08a26fe762fe685448259df3a3af47
--- /dev/null
+++ b/wandb/run-20220729_224434-36odnm43/files/output.log
@@ -0,0 +1,903 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul29_22-44-30_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=128,
+per_device_train_batch_size=128,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 79.13it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 443.72it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/vocab.json from cache at /home/javierr/.cache/huggingface/transformers/dbb47708b30c45561aee53d4cc808c3ed6c4785df1e2b757c8852837f6f58537.cd4a163fb068cd596a09687d4b1d7e345279d1497fdf547b3c986f4d6ee855b8
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/tokenizer_config.json from cache at /home/javierr/.cache/huggingface/transformers/32ac0765a83a8c538f4d3d8f104f69eab9648cb4d3e3a6bb6463185512f204d4.ff272c14a3ca512c9d9f85a898e6a086e8f790b06ff540f484000dab53132349
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/added_tokens.json from cache at /home/javierr/.cache/huggingface/transformers/a86e893e88bb40472d24ffc4e2be032578bb14755e88bb1e961bad6276161a17.23a5f4e269f91a88f81d8cb3c6f881c8eb30ad98b9c9691569d4cfb87512d722
+loading file https://huggingface.co/NbAiLab/nb-wav2vec2-1b-bokmaal/resolve/main/special_tokens_map.json from cache at /home/javierr/.cache/huggingface/transformers/c0891e9bf7d08f1e6d4c0d7da3104155480b7f65b84d2e1ef0506f017fd60443.2fce57d7d6ae62285f9966753bbdba194f7ff904d5f380e3bb6a6242819f9a93
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('quantizer', 'weight_proj', 'bias'), ('project_hid', 'kernel'), ('project_q', 'bias'), ('quantizer', 'codevectors'), ('project_hid', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('project_q', 'kernel')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #0:  49%|████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                    | 4664/9523 [00:00<00:00, 9466.64ex/s]
+removing punctuation from train split #1:  38%|██████████████████████████████████████████████████████████████████████████▎                                                                                                                          | 3594/9523 [00:00<00:00, 9256.81ex/s]
+removing punctuation from train split #2:  38%|███████████████████████████████████████████████████████████████████████████▏                                                                                                                         | 3633/9523 [00:00<00:00, 9358.53ex/s]
+removing punctuation from train split #3:  28%|██████████████████████████████████████████████████████▌                                                                                                                                              | 2635/9523 [00:00<00:00, 8984.44ex/s]
+removing punctuation from train split #4:  29%|██████████████████████████████████████████████████████████                                                                                                                                           | 2806/9523 [00:00<00:00, 9466.14ex/s]
+removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8682.42ex/s]
+removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8768.42ex/s]
+removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8710.81ex/s]
+removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8103.19ex/s]
+removing punctuation from train split #9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8712.19ex/s]
+removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8483.91ex/s]
+removing punctuation from train split #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8506.00ex/s]
+removing punctuation from train split #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8424.56ex/s]
+removing punctuation from train split #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8425.47ex/s]
+removing punctuation from train split #14: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8593.40ex/s]
+removing punctuation from train split #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8417.50ex/s]
+removing punctuation from train split #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8261.69ex/s]
+removing punctuation from train split #17: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8576.67ex/s]
+removing punctuation from train split #18: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8627.76ex/s]
+removing punctuation from train split #19: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8564.63ex/s]
+removing punctuation from train split #20: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8116.44ex/s]
+removing punctuation from train split #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8357.09ex/s]
+removing punctuation from train split #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8664.23ex/s]
+removing punctuation from train split #23: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8866.41ex/s]
+removing punctuation from train split #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8613.35ex/s]
+removing punctuation from train split #25: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8916.53ex/s]
+removing punctuation from train split #13:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9462/9522 [00:01<00:00, 8535.89ex/s]
+removing punctuation from train split #17:  62%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                         | 5951/9522 [00:00<00:00, 8429.16ex/s]
+removing punctuation from train split #15:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                      | 8448/9522 [00:01<00:00, 8529.83ex/s]
+removing punctuation from train split #17:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                       | 6839/9522 [00:00<00:00, 8569.69ex/s]
+removing punctuation from train split #15:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋    | 9313/9522 [00:01<00:00, 8565.31ex/s]
+removing punctuation from train split #16:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                        | 8354/9522 [00:01<00:00, 8458.16ex/s]
+removing punctuation from train split #16:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊      | 9221/9522 [00:01<00:00, 8520.83ex/s]
+removing punctuation from train split #18:  82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                   | 7795/9522 [00:00<00:00, 8700.55ex/s]
+removing punctuation from train split #17: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 9515/9522 [00:01<00:00, 8788.97ex/s]
+removing punctuation from train split #25:   8%|████████████████▎                                                                                                                                                                                    | 790/9522 [00:00<00:01, 7895.45ex/s]
+removing punctuation from train split #19:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                     | 7707/9522 [00:00<00:00, 8685.02ex/s]
+removing punctuation from train split #27:   0%|                                                                                                                                                                                                                 | 0/9522 [00:00<?, ?ex/s]
+removing punctuation from train split #19: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 9510/9522 [00:01<00:00, 8855.42ex/s]
+removing punctuation from train split #20:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                      | 7679/9522 [00:00<00:00, 7649.39ex/s]
+removing punctuation from train split #20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 9511/9522 [00:01<00:00, 8400.31ex/s]
+removing punctuation from train split #21:  87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                          | 8238/9522 [00:01<00:00, 8726.13ex/s]
+removing punctuation from train split #21:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌       | 9163/9522 [00:01<00:00, 8885.08ex/s]
+removing punctuation from train split #23:  83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                 | 7903/9522 [00:00<00:00, 8903.21ex/s]
+removing punctuation from train split #23:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋              | 8827/9522 [00:01<00:00, 9004.97ex/s]
+removing punctuation from train split #24:  83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                 | 7902/9522 [00:00<00:00, 8210.35ex/s]
+removing punctuation from train split #25:  83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                 | 7915/9522 [00:00<00:00, 9005.80ex/s]
+removing punctuation from train split #26:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                         | 6734/9522 [00:00<00:00, 8492.78ex/s]
+removing punctuation from train split #24:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊              | 8835/9522 [00:01<00:00, 8533.98ex/s]
+removing punctuation from train split #25:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋             | 8873/9522 [00:01<00:00, 9182.06ex/s]
+removing punctuation from train split #26:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                       | 7590/9522 [00:00<00:00, 8150.39ex/s]
+removing punctuation from train split #30:  37%|████████████████████████████████████████████████████████████████████████                                                                                                                            | 3501/9522 [00:00<00:00, 8923.79ex/s]
+removing punctuation from train split #26:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                    | 8531/9522 [00:01<00:00, 8521.75ex/s]
+removing punctuation from train split #27:  83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                 | 7885/9522 [00:00<00:00, 8621.72ex/s]
+removing punctuation from train split #30:  47%|███████████████████████████████████████████████████████████████████████████████████████████▊                                                                                                        | 4460/9522 [00:00<00:00, 9163.78ex/s]
+removing punctuation from train split #28:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████              | 8842/9522 [00:01<00:00, 8491.72ex/s]
+removing punctuation from train split #29:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                            | 8152/9522 [00:00<00:00, 8683.82ex/s]
+removing punctuation from train split #30:  66%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                  | 6313/9522 [00:00<00:00, 8903.50ex/s]
+removing punctuation from train split #29:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋         | 9068/9522 [00:01<00:00, 8823.64ex/s]
+removing punctuation from train split #30:  76%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                               | 7234/9522 [00:00<00:00, 8998.17ex/s]
+removing punctuation from train split #30:  86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                          | 8223/9522 [00:00<00:00, 9269.68ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00029_of_00032.arrow9153/9522 [00:01<00:00, 8874.24ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e95727cbb5149520.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-bf3a4646f2cc7d1e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-700584eae50b30d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f419992f41103a9a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0ce15383de123f82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c2ffe977c3ca528.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-77e9a41c98d2c055.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ca02ffcc2ead5be4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-132fad59bf0a7bde.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5bad770f7966b29c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c67da3e5ba23d80d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f4c42b3837129607.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-54eca263b0945f71.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a25410726e3e2193.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93597308b6796ca4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7de31af7ef864aec.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d435bfc379e86ba3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-34996d4582a01347.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7364f7772bc8e55c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-882eab3d3e4ccdc6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b82688771d02b67.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fcd85a06f50042cf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76d0551f460cbfdd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-add9d78d2ae035c6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3a802a16330bc893.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-77f63cc86278ba0d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2e37df503710f7ff.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b1bd5490817dfd0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e285fbba43f22ec7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cf3c371a9601d700.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-199850d23b8bc33e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-18c6699f6a47b276.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c7c318d769418266.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e1e44c73bea5de79.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35941776dd1a9a9a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a01b892bc6234e43.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-717e34c55691b634.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9f5a94c972494b29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c7696e78dcd9d87.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3ae2cddcb85ddf82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-02128b0a1e12d61d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-170ed8814f087137.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-bde172ec5c107f3a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e846c8b45705953.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3776d3bc40a606cf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c8f3518c7ace2dd9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-682855ceb81c2239.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6b5c0e3a046b6fef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fab49ba778f6ec6c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-32c2b80a72359b04.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a9db449ce68cb81.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad485da3303ed6c5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e69d8322fbcc5706.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7a27a8db26d116e6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5cd65615945fde32.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2d706abbf9dc0a0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f037a25fb5d4890.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-700b9b2cce8135ed.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ae8f4fc6243d74f8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-bf8731f76090f83d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-95f3b44c9605968b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5ba6f258b024aa55.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1e86f533a01c9159.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-49ca10c798e55480.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f15e3df3243e42b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e76b93d78db82c48.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d56545508433ae34.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c20e287b39841e9d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e79c4fca1cabd9ea.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-98bd765ba75325a2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-049b70cdac9bb9ec.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-77ce8ad2651ad9e8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ed8bc09190c9704a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-24c036a853c16841.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5e561f1f3b8f3741.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a2716a6b3c78a3f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b5253e670ca531f7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-971ae6e6a49fb24f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a60ee0b4c623460e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-62d27e1aa6637b2e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fa34ac55c9111385.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8861a1bd64d6ffc6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-28fecb911a3e758a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dcae97e762464f38.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b128047e58cb2894.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-448ce5c451ea8c83.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-686de9b6104b0a2e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e9ec6970f420d95c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d2e3c69d41987a9f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f1428308b2a8ebf0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1af2bc4c312bb48a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0a44a0c583316137.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1f4c6482d24d2b20.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a3ea771bd8d93.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-653bb87952e59ac0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-227e4b72313d71f5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c4d91593e5d00cfc_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ec9b95f37fa2acee_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9c67742adb8a4161_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-38d9e18531faeaa4_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d3697eda34b8ae62_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-61c9bafc51c3d945_00031_of_00032.arrow
+Feature extractor saved in ./preprocessor_config.json
+tokenizer config file saved in ./tokenizer_config.json
+Special tokens file saved in ./special_tokens_map.json
+added tokens file saved in ./added_tokens.json
+Configuration saved in ./config.json
+loading feature extractor configuration file ./preprocessor_config.json
+WARNING:pyctcdecode.language_model:kenlm python bindings are not installed. Most likely you want to install it using: pip install https://github.com/kpu/kenlm/archive/master.zip
+WARNING:pyctcdecode.decoder:kenlm python bindings are not installed. Most likely you want to install it using: pip install https://github.com/kpu/kenlm/archive/master.zip
+loading feature extractor configuration file ./preprocessor_config.json
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1209, in main
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+  File "/data/flax/lib/python3.8/site-packages/transformers/models/auto/processing_auto.py", line 243, in from_pretrained
+    return processor_class.from_pretrained(
+  File "/data/flax/lib/python3.8/site-packages/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py", line 144, in from_pretrained
+    decoder = BeamSearchDecoderCTC.load_from_dir(pretrained_model_name_or_path)
+  File "/data/flax/lib/python3.8/site-packages/pyctcdecode/decoder.py", line 790, in load_from_dir
+    filenames = cls.parse_directory_contents(filepath)
+  File "/data/flax/lib/python3.8/site-packages/pyctcdecode/decoder.py", line 765, in parse_directory_contents
+    raise ValueError(
+ValueError: Could not find alphabet file alphabet.json. Found ['preprocessor_config.json', 'wandb', 'vocab.json', 'config.json', 'run.sh', 'special_tokens_map.json', 'README.md', 'models', 'added_tokens.json', 'tokenizer_config.json', 'run_flax_speech_recognition_ctc.py']
\ No newline at end of file
diff --git a/wandb/run-20220729_224434-36odnm43/files/requirements.txt b/wandb/run-20220729_224434-36odnm43/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab
--- /dev/null
+++ b/wandb/run-20220729_224434-36odnm43/files/requirements.txt
@@ -0,0 +1,158 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+exceptiongroup==1.0.0rc8
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+hypothesis==6.53.0
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+jiwer==2.3.0
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pyctcdecode==0.4.0
+pygments==2.11.1
+pygtrie==2.5.0
+pyparsing==3.0.6
+python-dateutil==2.8.2
+python-levenshtein==0.12.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+sortedcontainers==2.4.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220729_224434-36odnm43/files/wandb-metadata.json b/wandb/run-20220729_224434-36odnm43/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..53da0a35a953e76358da65e762332594a0ad2ed0
--- /dev/null
+++ b/wandb/run-20220729_224434-36odnm43/files/wandb-metadata.json
@@ -0,0 +1,66 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-29T22:44:37.985063",
+    "startedAt": "2022-07-29T22:44:34.625516",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=NbAiLab/nb-wav2vec2-1b-bokmaal",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=128",
+        "--per_device_eval_batch_size=128",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220729_224434-36odnm43/files/wandb-summary.json b/wandb/run-20220729_224434-36odnm43/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..3ed4accf330a32a60ea895077e92f1e740703b4b
--- /dev/null
+++ b/wandb/run-20220729_224434-36odnm43/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 98}}
\ No newline at end of file
diff --git a/wandb/run-20220729_224434-36odnm43/logs/debug-internal.log b/wandb/run-20220729_224434-36odnm43/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..129d6316553b7783e9beddfbfe4df131c6948157
--- /dev/null
+++ b/wandb/run-20220729_224434-36odnm43/logs/debug-internal.log
@@ -0,0 +1,191 @@
+2022-07-29 22:44:35,544 INFO    MainThread:3538816 [internal.py:wandb_internal():87] W&B internal server running at pid: 3538816, started at: 2022-07-29 22:44:35.543810
+2022-07-29 22:44:35,546 INFO    WriterThread:3538816 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/run-36odnm43.wandb
+2022-07-29 22:44:35,546 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: check_version
+2022-07-29 22:44:35,546 DEBUG   SenderThread:3538816 [sender.py:send():234] send: header
+2022-07-29 22:44:35,547 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: check_version
+2022-07-29 22:44:35,585 DEBUG   SenderThread:3538816 [sender.py:send():234] send: run
+2022-07-29 22:44:35,751 INFO    SenderThread:3538816 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files
+2022-07-29 22:44:35,751 INFO    SenderThread:3538816 [sender.py:_start_run_threads():804] run started: 36odnm43 with start time 1659134674
+2022-07-29 22:44:35,752 DEBUG   SenderThread:3538816 [sender.py:send():234] send: summary
+2022-07-29 22:44:35,752 INFO    SenderThread:3538816 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 22:44:35,753 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: run_start
+2022-07-29 22:44:36,753 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/wandb-summary.json
+2022-07-29 22:44:37,984 DEBUG   HandlerThread:3538816 [meta.py:__init__():40] meta init
+2022-07-29 22:44:37,984 DEBUG   HandlerThread:3538816 [meta.py:__init__():54] meta init done
+2022-07-29 22:44:37,985 DEBUG   HandlerThread:3538816 [meta.py:probe():214] probe
+2022-07-29 22:44:37,986 DEBUG   HandlerThread:3538816 [meta.py:_setup_git():204] setup git
+2022-07-29 22:44:38,023 DEBUG   HandlerThread:3538816 [meta.py:_setup_git():211] setup git done
+2022-07-29 22:44:38,023 DEBUG   HandlerThread:3538816 [meta.py:_save_code():92] save code
+2022-07-29 22:44:38,036 DEBUG   HandlerThread:3538816 [meta.py:_save_code():113] save code done
+2022-07-29 22:44:38,036 DEBUG   HandlerThread:3538816 [meta.py:_save_patches():130] save patches
+2022-07-29 22:44:38,096 DEBUG   HandlerThread:3538816 [meta.py:_save_patches():172] save patches done
+2022-07-29 22:44:38,096 DEBUG   HandlerThread:3538816 [meta.py:_save_pip():58] save pip
+2022-07-29 22:44:38,097 DEBUG   HandlerThread:3538816 [meta.py:_save_pip():72] save pip done
+2022-07-29 22:44:38,097 DEBUG   HandlerThread:3538816 [meta.py:probe():252] probe done
+2022-07-29 22:44:38,100 DEBUG   SenderThread:3538816 [sender.py:send():234] send: files
+2022-07-29 22:44:38,100 INFO    SenderThread:3538816 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-29 22:44:38,101 INFO    SenderThread:3538816 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-29 22:44:38,107 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:44:38,108 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:44:38,589 INFO    Thread-11 :3538816 [upload_job.py:push():137] Uploaded file /tmp/tmprqhm2s9uwandb/3nhzs38m-wandb-metadata.json
+2022-07-29 22:44:38,757 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/wandb-metadata.json
+2022-07-29 22:44:38,757 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:44:38,757 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/code/run_flax_speech_recognition_ctc.py
+2022-07-29 22:44:38,757 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/requirements.txt
+2022-07-29 22:44:38,757 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/code
+2022-07-29 22:44:38,906 INFO    Thread-12 :3538816 [upload_job.py:push():137] Uploaded file /tmp/tmprqhm2s9uwandb/11rqutlz-code/run_flax_speech_recognition_ctc.py
+2022-07-29 22:44:40,758 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:44:42,759 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:44:44,759 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:44:46,761 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:44:52,763 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:44:53,261 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:44:53,262 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:44:54,764 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:44:56,765 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:45:06,056 DEBUG   SenderThread:3538816 [sender.py:send():234] send: stats
+2022-07-29 22:45:08,396 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:45:08,397 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:45:08,771 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:45:10,772 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:45:19,777 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:45:21,778 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:45:23,534 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:45:23,534 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:45:23,779 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:45:35,785 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:45:36,128 DEBUG   SenderThread:3538816 [sender.py:send():234] send: stats
+2022-07-29 22:45:37,787 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:45:38,817 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:45:38,818 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:45:47,792 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:45:49,793 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:45:51,794 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:45:53,796 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:45:53,986 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:45:53,986 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:45:55,797 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:45:57,798 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:45:59,799 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:46:01,800 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:46:03,801 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:46:05,802 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:46:06,195 DEBUG   SenderThread:3538816 [sender.py:send():234] send: stats
+2022-07-29 22:46:07,803 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:46:09,156 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:46:09,156 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:46:09,804 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:46:11,805 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:46:13,806 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:46:14,660 DEBUG   SenderThread:3538816 [sender.py:send():234] send: telemetry
+2022-07-29 22:46:14,660 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:46:14,660 DEBUG   SenderThread:3538816 [sender.py:send():234] send: exit
+2022-07-29 22:46:14,661 INFO    SenderThread:3538816 [sender.py:send_exit():366] handling exit code: 1
+2022-07-29 22:46:14,661 INFO    SenderThread:3538816 [sender.py:send_exit():368] handling runtime: 98
+2022-07-29 22:46:14,661 INFO    SenderThread:3538816 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 22:46:14,661 INFO    SenderThread:3538816 [sender.py:send_exit():374] send defer
+2022-07-29 22:46:14,662 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:46:14,662 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:46:14,662 INFO    HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-29 22:46:14,663 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:46:14,663 INFO    SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-29 22:46:14,663 INFO    SenderThread:3538816 [sender.py:transition_state():387] send defer: 1
+2022-07-29 22:46:14,663 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:46:14,663 INFO    HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-29 22:46:14,715 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:46:14,715 INFO    SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-29 22:46:14,715 INFO    SenderThread:3538816 [sender.py:transition_state():387] send defer: 2
+2022-07-29 22:46:14,715 DEBUG   SenderThread:3538816 [sender.py:send():234] send: stats
+2022-07-29 22:46:14,716 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:46:14,716 INFO    HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-29 22:46:14,716 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:46:14,716 INFO    SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-29 22:46:14,716 INFO    SenderThread:3538816 [sender.py:transition_state():387] send defer: 3
+2022-07-29 22:46:14,717 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:46:14,717 INFO    HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-29 22:46:14,717 DEBUG   SenderThread:3538816 [sender.py:send():234] send: summary
+2022-07-29 22:46:14,717 INFO    SenderThread:3538816 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 22:46:14,717 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:46:14,717 INFO    SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-29 22:46:14,717 INFO    SenderThread:3538816 [sender.py:transition_state():387] send defer: 4
+2022-07-29 22:46:14,718 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:46:14,718 INFO    HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-29 22:46:14,718 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:46:14,718 INFO    SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-29 22:46:14,764 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:46:14,807 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:46:14,807 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/wandb-summary.json
+2022-07-29 22:46:14,918 INFO    SenderThread:3538816 [sender.py:transition_state():387] send defer: 5
+2022-07-29 22:46:14,919 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:46:14,919 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:46:14,919 INFO    HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-29 22:46:14,919 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:46:14,919 INFO    SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-29 22:46:14,919 INFO    SenderThread:3538816 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-29 22:46:15,020 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:46:15,807 INFO    Thread-8  :3538816 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/config.yaml
+2022-07-29 22:46:15,808 INFO    SenderThread:3538816 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files
+2022-07-29 22:46:15,808 INFO    SenderThread:3538816 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/config.yaml config.yaml
+2022-07-29 22:46:15,808 INFO    SenderThread:3538816 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/requirements.txt requirements.txt
+2022-07-29 22:46:15,809 INFO    SenderThread:3538816 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log output.log
+2022-07-29 22:46:15,809 INFO    SenderThread:3538816 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/wandb-summary.json wandb-summary.json
+2022-07-29 22:46:15,809 INFO    SenderThread:3538816 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/wandb-metadata.json wandb-metadata.json
+2022-07-29 22:46:15,809 INFO    SenderThread:3538816 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-29 22:46:15,815 INFO    SenderThread:3538816 [sender.py:transition_state():387] send defer: 6
+2022-07-29 22:46:15,815 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:46:15,819 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:46:15,819 INFO    HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-29 22:46:15,819 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:46:15,819 INFO    SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-29 22:46:15,819 INFO    SenderThread:3538816 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 22:46:15,921 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:46:15,921 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:46:16,022 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:46:16,023 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:46:16,124 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:46:16,125 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:46:16,226 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:46:16,226 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:46:16,281 INFO    Thread-14 :3538816 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/requirements.txt
+2022-07-29 22:46:16,285 INFO    Thread-13 :3538816 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/config.yaml
+2022-07-29 22:46:16,304 INFO    Thread-16 :3538816 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/wandb-summary.json
+2022-07-29 22:46:16,328 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:46:16,328 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:46:16,430 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:46:16,430 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:46:16,484 INFO    Thread-15 :3538816 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/files/output.log
+2022-07-29 22:46:16,531 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:46:16,531 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:46:16,633 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:46:16,633 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:46:16,685 INFO    Thread-7  :3538816 [sender.py:transition_state():387] send defer: 7
+2022-07-29 22:46:16,686 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:46:16,687 INFO    HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-29 22:46:16,687 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:46:16,687 INFO    SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-29 22:46:16,734 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:46:17,278 INFO    SenderThread:3538816 [sender.py:transition_state():387] send defer: 8
+2022-07-29 22:46:17,278 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:46:17,279 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:46:17,279 INFO    HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-29 22:46:17,280 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:46:17,280 INFO    SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-29 22:46:17,280 INFO    SenderThread:3538816 [sender.py:transition_state():387] send defer: 9
+2022-07-29 22:46:17,280 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 22:46:17,280 INFO    HandlerThread:3538816 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-29 22:46:17,280 DEBUG   SenderThread:3538816 [sender.py:send():234] send: final
+2022-07-29 22:46:17,280 DEBUG   SenderThread:3538816 [sender.py:send():234] send: footer
+2022-07-29 22:46:17,280 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: defer
+2022-07-29 22:46:17,281 INFO    SenderThread:3538816 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-29 22:46:17,380 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 22:46:17,380 DEBUG   SenderThread:3538816 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 22:46:17,381 INFO    SenderThread:3538816 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 22:46:17,641 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-29 22:46:17,642 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-29 22:46:17,642 DEBUG   HandlerThread:3538816 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-29 22:46:17,642 INFO    HandlerThread:3538816 [handler.py:finish():731] shutting down handler
+2022-07-29 22:46:18,281 INFO    WriterThread:3538816 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/run-36odnm43.wandb
+2022-07-29 22:46:18,640 INFO    SenderThread:3538816 [sender.py:finish():1070] shutting down sender
+2022-07-29 22:46:18,640 INFO    SenderThread:3538816 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 22:46:18,640 INFO    SenderThread:3538816 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 22:46:18,643 INFO    MainThread:3538816 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220729_224434-36odnm43/logs/debug.log b/wandb/run-20220729_224434-36odnm43/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..7ea0d2be71419b182619e7eaa3b26e2593556cfa
--- /dev/null
+++ b/wandb/run-20220729_224434-36odnm43/logs/debug.log
@@ -0,0 +1,148 @@
+2022-07-29 22:44:34,627 INFO    MainThread:3537462 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-29 22:44:34,627 INFO    MainThread:3537462 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-29 22:44:34,627 INFO    MainThread:3537462 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/logs/debug.log
+2022-07-29 22:44:34,627 INFO    MainThread:3537462 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_224434-36odnm43/logs/debug-internal.log
+2022-07-29 22:44:34,627 INFO    MainThread:3537462 [wandb_init.py:init():404] calling init triggers
+2022-07-29 22:44:34,627 INFO    MainThread:3537462 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-29 22:44:34,627 INFO    MainThread:3537462 [wandb_init.py:init():460] starting backend
+2022-07-29 22:44:34,627 INFO    MainThread:3537462 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-29 22:44:34,692 INFO    MainThread:3537462 [backend.py:ensure_launched():216] starting backend process...
+2022-07-29 22:44:34,737 INFO    MainThread:3537462 [backend.py:ensure_launched():221] started backend process with pid: 3538816
+2022-07-29 22:44:34,739 INFO    MainThread:3537462 [wandb_init.py:init():469] backend started and connected
+2022-07-29 22:44:34,754 INFO    MainThread:3537462 [wandb_init.py:init():533] updated telemetry
+2022-07-29 22:44:34,864 INFO    MainThread:3537462 [wandb_init.py:init():563] communicating current version
+2022-07-29 22:44:35,583 INFO    MainThread:3537462 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-29 22:44:35,584 INFO    MainThread:3537462 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-29 22:44:35,753 INFO    MainThread:3537462 [wandb_init.py:init():606] starting run threads in backend
+2022-07-29 22:44:38,104 INFO    MainThread:3537462 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-29 22:44:38,104 INFO    MainThread:3537462 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-29 22:44:38,105 INFO    MainThread:3537462 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-29 22:44:38,107 INFO    MainThread:3537462 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-29 22:44:38,107 INFO    MainThread:3537462 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-29 22:46:12,243 INFO    MainThread:3537462 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-29 22:46:12,247 INFO    MainThread:3537462 [wandb_run.py:_restore():1752] restore
+2022-07-29 22:46:14,662 INFO    MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73946
+  total_bytes: 73946
+}
+
+2022-07-29 22:46:14,919 INFO    MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73946
+  total_bytes: 73946
+}
+
+2022-07-29 22:46:15,820 INFO    MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73946
+  total_bytes: 261315
+}
+
+2022-07-29 22:46:15,922 INFO    MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73946
+  total_bytes: 261315
+}
+
+2022-07-29 22:46:16,023 INFO    MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 261315
+  total_bytes: 261315
+}
+
+2022-07-29 22:46:16,125 INFO    MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 261315
+  total_bytes: 261315
+}
+
+2022-07-29 22:46:16,227 INFO    MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 261315
+  total_bytes: 261315
+}
+
+2022-07-29 22:46:16,329 INFO    MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 261315
+  total_bytes: 261315
+}
+
+2022-07-29 22:46:16,430 INFO    MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 261315
+  total_bytes: 261315
+}
+
+2022-07-29 22:46:16,532 INFO    MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 261315
+  total_bytes: 261315
+}
+
+2022-07-29 22:46:16,634 INFO    MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 261315
+  total_bytes: 261315
+}
+
+2022-07-29 22:46:17,279 INFO    MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 261315
+  total_bytes: 261315
+}
+
+2022-07-29 22:46:17,640 INFO    MainThread:3537462 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 261315
+  total_bytes: 261315
+}
+local_info {
+}
+
+2022-07-29 22:46:19,154 INFO    MainThread:3537462 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220729_224434-36odnm43/run-36odnm43.wandb b/wandb/run-20220729_224434-36odnm43/run-36odnm43.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..c091db00292617e4879cb0f58fef68db83c1776c
--- /dev/null
+++ b/wandb/run-20220729_224434-36odnm43/run-36odnm43.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5097f7267ca776d7a5ef9b9fc09a67bd4d22fd687ab2721c70b19c6e2d2c165
+size 187832
diff --git a/wandb/run-20220729_225502-398l7dkj/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_225502-398l7dkj/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac
--- /dev/null
+++ b/wandb/run-20220729_225502-398l7dkj/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1605 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220729_225502-398l7dkj/files/config.yaml b/wandb/run-20220729_225502-398l7dkj/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9ac95417d545afa2ab00e2c8ab3be06297e65197
--- /dev/null
+++ b/wandb/run-20220729_225502-398l7dkj/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659135302
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220729_225502-398l7dkj/files/output.log b/wandb/run-20220729_225502-398l7dkj/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..41bd58fdd7196843dddd2ef6e441e979aa032bc0
--- /dev/null
+++ b/wandb/run-20220729_225502-398l7dkj/files/output.log
@@ -0,0 +1,1111 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul29_22-54-58_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=128,
+per_device_train_batch_size=128,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 79.14it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 438.46it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_hid', 'kernel'), ('project_q', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('project_q', 'bias'), ('project_hid', 'bias'), ('quantizer', 'codevectors')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'bias'), ('lm_head', 'kernel')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8908.66ex/s]
+removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8922.00ex/s]
+removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8639.32ex/s]
+removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8852.72ex/s]
+removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7630.58ex/s]
+removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8552.85ex/s]
+removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8690.10ex/s]
+removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8399.08ex/s]
+removing punctuation from train split #6:  86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                            | 8169/9523 [00:00<00:00, 8175.26ex/s]
+removing punctuation from train split #5:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊            | 8933/9523 [00:01<00:00, 8077.77ex/s]
+removing punctuation from train split #9:  35%|█████████████████████████████████████████████████████████████████████▎                                                                                                                               | 3351/9523 [00:00<00:00, 8461.01ex/s]
+removing punctuation from train split #6:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋          | 9025/9523 [00:01<00:00, 8283.45ex/s]
+removing punctuation from train split #7:  64%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                       | 6060/9523 [00:00<00:00, 8680.27ex/s]
+removing punctuation from train split #9:  44%|███████████████████████████████████████████████████████████████████████████████████████▏                                                                                                             | 4213/9523 [00:00<00:00, 8517.91ex/s]
+removing punctuation from train split #7: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 9484/9523 [00:01<00:00, 8220.08ex/s]
+removing punctuation from train split #8:  75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                 | 7143/9523 [00:01<00:00, 6487.45ex/s]
+removing punctuation from train split #9:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                      | 7672/9523 [00:00<00:00, 7900.51ex/s]
+removing punctuation from train split #10:  82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                   | 7785/9523 [00:00<00:00, 8104.51ex/s]
+removing punctuation from train split #10:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                 | 8660/9523 [00:01<00:00, 8291.78ex/s]
+removing punctuation from train split #12:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                       | 6839/9522 [00:00<00:00, 8631.85ex/s]
+removing punctuation from train split #12:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                     | 7703/9522 [00:00<00:00, 8030.92ex/s]
+removing punctuation from train split #14:  61%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                            | 5823/9522 [00:00<00:00, 8420.75ex/s]
+removing punctuation from train split #15:  53%|███████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                            | 5020/9522 [00:00<00:00, 8385.01ex/s]
+removing punctuation from train split #15:  63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                         | 5977/9522 [00:00<00:00, 8760.16ex/s]
+removing punctuation from train split #16:  62%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                           | 5869/9522 [00:00<00:00, 8467.55ex/s]
+removing punctuation from train split #18:  34%|██████████████████████████████████████████████████████████████████▌                                                                                                                                 | 3233/9522 [00:00<00:00, 8188.93ex/s]
+removing punctuation from train split #18:  43%|███████████████████████████████████████████████████████████████████████████████████▋                                                                                                                | 4064/9522 [00:00<00:00, 8229.90ex/s]
+removing punctuation from train split #20:  26%|██████████████████████████████████████████████████▎                                                                                                                                                 | 2446/9522 [00:00<00:00, 8223.13ex/s]
+removing punctuation from train split #20:  35%|███████████████████████████████████████████████████████████████████▋                                                                                                                                | 3288/9522 [00:00<00:00, 8298.23ex/s]
+removing punctuation from train split #22:  16%|██████████████████████████████▉                                                                                                                                                                     | 1500/9522 [00:00<00:01, 7600.24ex/s]
+removing punctuation from train split #22:  25%|████████████████████████████████████████████████▏                                                                                                                                                   | 2344/9522 [00:00<00:00, 7978.19ex/s]
+removing punctuation from train split #24:   8%|███████████████▊                                                                                                                                                                                     | 764/9522 [00:00<00:01, 7637.08ex/s]
+removing punctuation from train split #25:   0%|                                                                                                                                                                                                                 | 0/9522 [00:00<?, ?ex/s]
+removing punctuation from train split #25:   8%|███████████████▎                                                                                                                                                                                     | 743/9522 [00:00<00:01, 7423.02ex/s]
+removing punctuation from train split #17:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████      | 9236/9522 [00:01<00:00, 8308.03ex/s]
+removing punctuation from train split #21:  61%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                             | 5783/9522 [00:00<00:00, 8421.29ex/s]
+removing punctuation from train split #19:  79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                         | 7494/9522 [00:00<00:00, 7998.36ex/s]
+removing punctuation from train split #26:  25%|█████████████████████████████████████████████████                                                                                                                                                   | 2384/9522 [00:00<00:00, 8063.96ex/s]
+removing punctuation from train split #21:  70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                           | 6655/9522 [00:00<00:00, 8514.59ex/s]
+removing punctuation from train split #25:  34%|██████████████████████████████████████████████████████████████████▏                                                                                                                                 | 3214/9522 [00:00<00:00, 8064.03ex/s]
+removing punctuation from train split #23:  59%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                               | 5646/9522 [00:00<00:00, 8482.99ex/s]
+removing punctuation from train split #25:  43%|███████████████████████████████████████████████████████████████████████████████████▌                                                                                                                | 4062/9522 [00:00<00:00, 8212.36ex/s]
+removing punctuation from train split #26:  43%|███████████████████████████████████████████████████████████████████████████████████▎                                                                                                                | 4050/9522 [00:00<00:00, 7966.15ex/s]
+removing punctuation from train split #31:   0%|                                                                                                                                                                                                                 | 0/9522 [00:00<?, ?ex/s]
+removing punctuation from train split #27:  44%|██████████████████████████████████████████████████████████████████████████████████████▋                                                                                                             | 4210/9522 [00:00<00:00, 8317.54ex/s]
+removing punctuation from train split #29:  17%|████████████████████████████████▉                                                                                                                                                                   | 1600/9522 [00:00<00:00, 8053.56ex/s]
+removing punctuation from train split #29:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 9415/9522 [00:01<00:00, 8085.99ex/s]
+removing punctuation from train split #31:  17%|█████████████████████████████████▍                                                                                                                                                                  | 1627/9522 [00:00<00:00, 8183.43ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00029_of_00032.arrow8857/9522 [00:01<00:00, 8956.36ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow
+preprocess dataset #0:   0%|▍                                                                                                                                                                                                                           | 18/9497 [00:00<05:22, 29.37ex/s]
+preprocess dataset #1:   0%|█                                                                                                                                                                                                                           | 47/9497 [00:01<03:54, 40.30ex/s]
+preprocess dataset #2:   0%|▎                                                                                                                                                                                                                           | 16/9497 [00:00<06:40, 23.65ex/s]
+preprocess dataset #3:   1%|██▏                                                                                                                                                                                                                         | 95/9497 [00:02<03:14, 48.36ex/s]
+preprocess dataset #4:   1%|█▎                                                                                                                                                                                                                          | 59/9497 [00:01<03:16, 47.92ex/s]
+preprocess dataset #5:   0%|▋                                                                                                                                                                                                                           | 27/9497 [00:01<04:16, 36.85ex/s]
+preprocess dataset #6:   1%|██▏                                                                                                                                                                                                                         | 92/9497 [00:02<03:30, 44.62ex/s]
+preprocess dataset #7:   0%|▉                                                                                                                                                                                                                           | 42/9497 [00:01<03:59, 39.43ex/s]
+preprocess dataset #8:   0%|▍                                                                                                                                                                                                                           | 21/9497 [00:01<05:17, 29.81ex/s]
+preprocess dataset #9:   1%|█▊                                                                                                                                                                                                                          | 76/9497 [00:02<03:35, 43.65ex/s]
+preprocess dataset #10:   0%|▋                                                                                                                                                                                                                          | 31/9497 [00:01<04:24, 35.82ex/s]
+preprocess dataset #11:   1%|██                                                                                                                                                                                                                         | 92/9496 [00:02<03:59, 39.28ex/s]
+preprocess dataset #12:   1%|█▎                                                                                                                                                                                                                         | 56/9496 [00:02<03:44, 42.05ex/s]
+preprocess dataset #13:   0%|▌                                                                                                                                                                                                                          | 25/9496 [00:01<04:56, 31.90ex/s]
+preprocess dataset #14:   0%|▉                                                                                                                                                                                                                          | 43/9496 [00:01<04:13, 37.30ex/s]
+preprocess dataset #15:   1%|██                                                                                                                                                                                                                         | 87/9496 [00:02<04:02, 38.83ex/s]
+preprocess dataset #16:   0%|▊                                                                                                                                                                                                                          | 35/9496 [00:01<04:41, 33.62ex/s]
+preprocess dataset #17:   0%|                                                                                                                                                                                                                            | 5/9496 [00:00<20:21,  7.77ex/s]
+preprocess dataset #18:   1%|█▍                                                                                                                                                                                                                         | 63/9496 [00:02<04:23, 35.81ex/s]
+preprocess dataset #19:   0%|▎                                                                                                                                                                                                                          | 13/9496 [00:01<08:50, 17.86ex/s]
+preprocess dataset #20:   1%|█▍                                                                                                                                                                                                                         | 62/9496 [00:02<04:44, 33.12ex/s]
+preprocess dataset #21:   0%|▋                                                                                                                                                                                                                          | 29/9496 [00:01<05:37, 28.05ex/s]
+preprocess dataset #22:   0%|▋                                                                                                                                                                                                                          | 30/9496 [00:01<05:26, 28.96ex/s]
+preprocess dataset #23:   1%|█▋                                                                                                                                                                                                                         | 73/9496 [00:03<05:49, 26.94ex/s]
+preprocess dataset #24:   0%|▉                                                                                                                                                                                                                          | 41/9496 [00:01<04:17, 36.66ex/s]
+preprocess dataset #25:   1%|█▋                                                                                                                                                                                                                         | 71/9496 [00:03<05:08, 30.56ex/s]
+preprocess dataset #26:   0%|▉                                                                                                                                                                                                                          | 40/9496 [00:01<04:44, 33.20ex/s]
+preprocess dataset #27:   1%|█▏                                                                                                                                                                                                                         | 54/9496 [00:02<06:08, 25.63ex/s]
+preprocess dataset #28:   0%|▊                                                                                                                                                                                                                          | 35/9496 [00:02<05:55, 26.63ex/s]
+preprocess dataset #29:   1%|█▍                                                                                                                                                                                                                         | 62/9496 [00:02<04:23, 35.81ex/s]
+preprocess dataset #30:   0%|▊                                                                                                                                                                                                                          | 33/9496 [00:01<06:00, 26.22ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #9:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                   | 7950/9497 [04:21<00:45, 33.91ex/s]
+preprocess dataset #10:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                    | 7913/9497 [04:20<00:50, 31.53ex/s]
+preprocess dataset #11:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                  | 7994/9496 [04:19<00:43, 34.36ex/s]
+preprocess dataset #12:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                    | 7919/9496 [04:19<00:41, 38.08ex/s]
+preprocess dataset #13:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                   | 7943/9496 [04:18<00:46, 33.67ex/s]
+preprocess dataset #14:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                      | 7828/9496 [04:17<00:48, 34.22ex/s]
+preprocess dataset #15:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                    | 7888/9496 [04:16<00:42, 38.26ex/s]
+preprocess dataset #16:  81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                        | 7725/9496 [04:15<00:50, 34.81ex/s]
+preprocess dataset #17:  81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                        | 7730/9496 [04:14<00:49, 35.44ex/s]
+preprocess dataset #18:  82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                        | 7750/9496 [04:13<00:46, 37.95ex/s]
+preprocess dataset #19:  81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                         | 7701/9496 [04:12<00:53, 33.40ex/s]
+preprocess dataset #20:  80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                           | 7618/9496 [04:11<00:53, 35.25ex/s]
+preprocess dataset #21:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                           | 7580/9496 [04:10<00:52, 36.44ex/s]
+preprocess dataset #22:  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                             | 7518/9496 [04:09<00:53, 36.78ex/s]
+preprocess dataset #23:  79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                              | 7458/9496 [04:08<01:03, 32.18ex/s]
+preprocess dataset #24:  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                              | 7484/9496 [04:07<01:04, 31.38ex/s]
+preprocess dataset #25:  78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                               | 7444/9496 [04:06<00:56, 36.13ex/s]
+preprocess dataset #26:  78%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                | 7371/9496 [04:05<01:05, 32.31ex/s]
+preprocess dataset #27:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                               | 7423/9496 [04:03<01:06, 31.28ex/s]
+preprocess dataset #28:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                | 7396/9496 [04:03<01:02, 33.82ex/s]
+preprocess dataset #29:  77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                 | 7340/9496 [04:01<01:04, 33.60ex/s]
+preprocess dataset #30:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                | 7398/9496 [04:00<01:01, 34.12ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                    | 8617/9496 [04:38<00:27, 32.15ex/s]
+preprocess dataset #30:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                  | 8690/9496 [04:40<00:24, 33.26ex/s]
+preprocess dataset #30:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                 | 8744/9496 [04:42<00:19, 37.65ex/s]
+preprocess dataset #28:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎               | 8807/9496 [04:47<00:16, 42.14ex/s]
+preprocess dataset #29:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                | 8769/9496 [04:45<00:16, 43.33ex/s]
+preprocess dataset #30:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍               | 8815/9496 [04:44<00:20, 32.67ex/s]
+preprocess dataset #28:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏             | 8890/9496 [04:49<00:18, 33.12ex/s]
+preprocess dataset #29:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏              | 8847/9496 [04:48<00:22, 28.59ex/s]
+preprocess dataset #30:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████              | 8884/9496 [04:46<00:18, 32.54ex/s]
+preprocess dataset #30:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊            | 8965/9496 [04:49<00:12, 43.73ex/s]
+preprocess dataset #29:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌           | 8994/9496 [04:52<00:14, 33.51ex/s]
+preprocess dataset #31:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                | 8779/9496 [04:49<00:15, 45.61ex/s]
+preprocess dataset #27:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍        | 9121/9496 [04:56<00:07, 48.48ex/s]
+preprocess dataset #28:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏        | 9109/9496 [04:55<00:07, 52.45ex/s]
+preprocess dataset #29:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏         | 9066/9496 [04:54<00:08, 50.14ex/s]
+preprocess dataset #30:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊         | 9094/9496 [04:53<00:08, 46.96ex/s]
+preprocess dataset #29:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████        | 9148/9496 [04:56<00:06, 52.34ex/s]
+preprocess dataset #30:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊       | 9179/9496 [04:55<00:06, 47.90ex/s]
+preprocess dataset #29:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌     | 9258/9496 [04:58<00:04, 54.85ex/s]
+preprocess dataset #30:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏    | 9288/9496 [04:57<00:03, 55.11ex/s]
+preprocess dataset #28:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 9396/9496 [05:01<00:01, 51.23ex/s]
+preprocess dataset #29:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 9347/9496 [04:59<00:02, 55.02ex/s]
+preprocess dataset #30:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 9379/9496 [04:58<00:02, 52.53ex/s]
+preprocess dataset #26: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 9463/9496 [05:05<00:00, 58.54ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9454/9496 [05:01<00:00, 56.57ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 9460/9496 [05:01<00:00, 45.04ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 9471/9496 [05:02<00:00, 43.99ex/s]
+preprocess dataset #30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9496/9496 [05:01<00:00, 43.26ex/s]
+preprocess dataset #31:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████     | 9280/9496 [04:59<00:03, 56.37ex/s]
+preprocess dataset #31:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌    | 9301/9496 [05:00<00:03, 51.21ex/s]
+
+preprocess dataset #4:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #5:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #6:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #7:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #8:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #9:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #10:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #11:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #12:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #13:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #14:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #15:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #16:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #17:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #18:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #19:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #20:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #21:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #22:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #23:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #24:   3%|██████▌                                                                                                                                                                                                                    | 38/1266 [00:01<00:38, 31.79ex/s]
+preprocess dataset #25:   3%|██████▍                                                                                                                                                                                                                    | 37/1266 [00:01<00:37, 32.44ex/s]
+preprocess dataset #26:   3%|██████▌                                                                                                                                                                                                                    | 38/1266 [00:01<00:39, 31.38ex/s]
+preprocess dataset #27:   3%|██████▏                                                                                                                                                                                                                    | 36/1266 [00:01<00:38, 31.85ex/s]
+preprocess dataset #28:   3%|██████▍                                                                                                                                                                                                                    | 37/1266 [00:01<00:37, 33.06ex/s]
+preprocess dataset #29:   3%|█████▌                                                                                                                                                                                                                     | 32/1266 [00:01<00:42, 28.70ex/s]
+preprocess dataset #30:   2%|█████                                                                                                                                                                                                                      | 29/1266 [00:01<00:46, 26.65ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #29:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                      | 1044/1266 [00:37<00:08, 25.23ex/s]
+preprocess dataset #30:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                     | 1046/1266 [00:37<00:08, 27.14ex/s]
+
+preprocess dataset #30:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌          | 1205/1266 [00:41<00:01, 37.48ex/s]
+preprocess dataset #23:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 1256/1266 [00:43<00:00, 35.15ex/s]
+preprocess dataset #26:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 1251/1266 [00:43<00:00, 38.92ex/s]
+preprocess dataset #20: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1261/1266 [00:43<00:00, 33.93ex/s]
+preprocess dataset #9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 1264/1267 [00:44<00:00, 30.72ex/s]
+preprocess dataset #14: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 1261/1267 [00:44<00:00, 32.71ex/s]
+preprocess dataset #26:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1259/1266 [00:43<00:00, 32.37ex/s]
+preprocess dataset #29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 1260/1266 [00:43<00:00, 36.38ex/s]
+preprocess dataset #30:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 1258/1266 [00:43<00:00, 36.94ex/s]
+preprocess dataset #23: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 1264/1266 [00:43<00:00, 32.61ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 1262/1266 [00:43<00:00, 36.86ex/s]
+preprocess dataset #20: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1265/1266 [00:44<00:00, 32.26ex/s]
+preprocess dataset #17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1267/1267 [00:44<00:00, 32.65ex/s]
+https://symbolize.stripped_domain/r/?trace=https://symbolize.stripped_domain/r/?trace=7feeb3b583f4,7fec64399293,7feeb3b583f4,7feeb3bac0bf,7fec64399293,7f,7feeb3bac0bf,bd13a5683c8e44037f,&map=bd13a5683c8e4403&map=fbcd4e3f2be272741f2aecd9d840a066:7fec4edfc000-7fec6472bc60fbcd4e3f2be272741f2aecd9d840a066:7fec4edfc000-7fec6472bc60
+E0729 23:04:08.866578 3388645 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0729 23:04:08.866580 3388608 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+https://symbolize.stripped_domain/r/?trace=https://symbolize.stripped_domain/r/?trace=7fec6440d6a3,7fec6440d6a3,7feeb3bac0bf,7feeb3bac0bf,7fec642ae3e6,7fec642ae3e6,7fec643de5c4,7fec643de5c4,7fec643de72c,7fec643de72c,7fec643d6912,7fec643d6912,7fec643d63c9,7fec643d63c9,7fec64724bcd,7fec64724bcd,7fec64399d0e,7fec64399d0e,7feeb3bac0bf,7feeb3bac0bf,7f,7f,bd13a5683c8e4403bd13a5683c8e4403&map=&map=fbcd4e3f2be272741f2aecd9d840a066:7fec4edfc000-7fec6472bc60fbcd4e3f2be272741f2aecd9d840a066:7fec4edfc000-7fec6472bc60
+E0729 23:04:08.867321 3388645 process_state.cc:1062] RAW: Signal 11 raised at PC: 0x7fec6440d6a3 while already in FailureSignalHandler!
+E0729 23:04:08.867322 3388608 process_state.cc:1062] RAW: Signal 11 raised at PC: 0x7fec6440d6a3 while already in FailureSignalHandler!
+E0729 23:04:08.867342 3388645 process_state.cc:1097] RAW: Raising 11 signal with default behavior
+E0729 23:04:08.867344 3388608 process_state.cc:1097] RAW: Raising 11 signal with default behavior
+preprocess dataset #0:   0%|                                                                                                                                                                                                                             | 1/2555 [00:00<28:39,  1.49ex/s]
+preprocess dataset #1:   0%|                                                                                                                                                                                                                             | 1/2555 [00:00<29:22,  1.45ex/s]
+preprocess dataset #2:   0%|                                                                                                                                                                                                                                     | 0/2555 [00:00<?, ?ex/s]
+preprocess dataset #3:   0%|                                                                                                                                                                                                                             | 1/2555 [00:00<28:22,  1.50ex/s]
+preprocess dataset #4:   0%|                                                                                                                                                                                                                                     | 0/2555 [00:00<?, ?ex/s]
+preprocess dataset #5:   0%|                                                                                                                                                                                                                                     | 0/2555 [00:00<?, ?ex/s]
+preprocess dataset #6:   0%|                                                                                                                                                                                                                                     | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #7:   0%|                                                                                                                                                                                                                                     | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #8:   0%|                                                                                                                                                                                                                                     | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #9:   0%|                                                                                                                                                                                                                                     | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #10:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #11:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #12:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #13:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #14:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #15:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #16:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #17:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #18:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #19:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #20:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #21:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #22:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #23:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #24:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #25:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #26:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #27:   2%|███▍                                                                                                                                                                                                                       | 40/2554 [00:01<01:14, 33.57ex/s]
+preprocess dataset #28:   1%|███▎                                                                                                                                                                                                                       | 38/2554 [00:01<01:15, 33.42ex/s]
+preprocess dataset #29:   1%|███▎                                                                                                                                                                                                                       | 38/2554 [00:01<01:22, 30.52ex/s]
+preprocess dataset #30:   1%|███▎                                                                                                                                                                                                                       | 38/2554 [00:01<01:16, 32.78ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #29:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊         | 2446/2554 [01:16<00:03, 31.41ex/s]
+preprocess dataset #30:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊        | 2458/2554 [01:16<00:02, 37.71ex/s]
+preprocess dataset #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 2553/2554 [01:18<00:00, 32.35ex/s]
+preprocess dataset #20: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 2544/2554 [01:18<00:00, 31.68ex/s]
+preprocess dataset #23:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 2533/2554 [01:18<00:00, 32.60ex/s]
+preprocess dataset #24:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 2528/2554 [01:18<00:01, 25.70ex/s]
+preprocess dataset #24:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 2532/2554 [01:18<00:00, 26.98ex/s]
+preprocess dataset #26:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 2524/2554 [01:18<00:01, 27.07ex/s]
+preprocess dataset #26:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 2528/2554 [01:18<00:00, 29.60ex/s]
+preprocess dataset #28:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 2522/2554 [01:18<00:00, 32.42ex/s]
+preprocess dataset #29:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊    | 2505/2554 [01:18<00:01, 32.41ex/s]
+preprocess dataset #30:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 2528/2554 [01:18<00:00, 31.75ex/s]
+preprocess dataset #29:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 2509/2554 [01:18<00:01, 31.26ex/s]
+preprocess dataset #26: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 2549/2554 [01:19<00:00, 35.18ex/s]
+preprocess dataset #19: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 2553/2554 [01:19<00:00, 37.73ex/s]
+preprocess dataset #17: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2548/2554 [01:19<00:00, 36.98ex/s]
+preprocess dataset #25:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 2537/2554 [01:19<00:00, 36.71ex/s]
+preprocess dataset #26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2554/2554 [01:19<00:00, 38.03ex/s]
+preprocess dataset #29:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 2532/2554 [01:19<00:00, 37.36ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2551/2554 [01:19<00:00, 37.87ex/s]
+preprocess dataset #29:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 2536/2554 [01:19<00:00, 35.78ex/s]
+preprocess dataset #25:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 2541/2554 [01:19<00:00, 29.17ex/s]
+preprocess dataset #25: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2551/2554 [01:20<00:00, 21.62ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 2546/2554 [01:20<00:00, 19.97ex/s]
+#16: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 544.86ba/s]
+#17: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 545.51ba/s]
+#18: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 547.94ba/s]
+#21: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 701.34ba/s]
+#19: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 548.87ba/s]
+#23: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 551.43ba/s]
+#20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 346.00ba/s]
+#24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 478.05ba/s]
+#25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 530.01ba/s]
+#22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 369.54ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 564.09ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 544.81ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 566.68ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 548.33ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 354.68ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 356.57ba/s]
+#19:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 226.41ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 250.44ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 254.07ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 251.47ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 242.68ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 245.49ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 262.92ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 269.80ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 284.21ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 338.41ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 405.99ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 334.81ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 348.07ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 139.56ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 141.61ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 146.86ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 140.51ba/s]
+#22:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#22: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 99.36ba/s]
+#23: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 79.21ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 115.86ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 121.23ba/s]
+#25: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 97.40ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 101.14ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 120.25ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 120.65ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 122.04ba/s]
+https://symbolize.stripped_domain/r/?trace=7feeb3b583f4,7feeb3bac0bf,7f,ba4ecc053cb42d79&map=                                                                                                                                                                       | 0/2 [00:00<?, ?ba/s]
+*** SIGTERM received by PID 2555274 (TID 2555274) on cpu 79 from PID 3548990; stack trace: ***
+PC: @     0x7feeb3b583f4  (unknown)  do_futex_wait.constprop.0                                                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+    @     0x7fec64399294        976  (unknown)
+    @     0x7feeb3bac0c0  905985168  (unknown)
+    @               0x80  (unknown)  (unknown)                                                                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+    @ 0xba4ecc053cb42d7a  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7feeb3b583f4,7fec64399293,7feeb3bac0bf,7f,ba4ecc053cb42d79&map=fbcd4e3f2be272741f2aecd9d840a066:7fec4edfc000-7fec6472bc60                                                                                                | 0/2 [00:00<?, ?ba/s]
+E0729 23:06:47.108410 2555274 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                               | 0/2 [00:00<?, ?ba/s]
+E0729 23:06:47.185201 2555274 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 81.26ba/s]
+#17: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 74.67ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 121.14ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 122.49ba/s]
+#19: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 90.07ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 119.12ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 118.01ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 121.86ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 122.09ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 122.51ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 117.19ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 119.10ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 125.11ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 126.39ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 133.19ba/s]
+https://symbolize.stripped_domain/r/?trace=7feeb3b583f4,7feeb3bac0bf,7f,ba4ecc053cb42d79&map=
+*** SIGTERM received by PID 2555497 (TID 2555497) on cpu 84 from PID 3548990; stack trace: ***
+PC: @     0x7feeb3b583f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7fec64399294        976  (unknown)
+    @     0x7feeb3bac0c0  905985168  (unknown)                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+    @               0x80  (unknown)  (unknown)
+    @ 0xba4ecc053cb42d7a  (unknown)  (unknown)                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+https://symbolize.stripped_domain/r/?trace=7feeb3b583f4,7fec64399293,7feeb3bac0bf,7f,ba4ecc053cb42d79&map=fbcd4e3f2be272741f2aecd9d840a066:7fec4edfc000-7fec6472bc60
+E0729 23:06:53.734645 2555497 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0729 23:06:53.808725 2555497 process_state.cc:774] RAW: Raising signal 15 with default behavior                                                                                                                                                                    | 0/3 [00:00<?, ?ba/s]
+Feature extractor saved in ./preprocessor_config.json
+tokenizer config file saved in ./tokenizer_config.json
+Special tokens file saved in ./special_tokens_map.json                                                                                                                                                                                                              | 0/3 [00:00<?, ?ba/s]
+added tokens file saved in ./added_tokens.json
+Configuration saved in ./config.json                                                                                                                                                                                                                                | 0/3 [00:00<?, ?ba/s]
+loading feature extractor configuration file ./preprocessor_config.json
+WARNING:pyctcdecode.language_model:kenlm python bindings are not installed. Most likely you want to install it using: pip install https://github.com/kpu/kenlm/archive/master.zip                                                                                   | 0/3 [00:00<?, ?ba/s]
+WARNING:pyctcdecode.decoder:kenlm python bindings are not installed. Most likely you want to install it using: pip install https://github.com/kpu/kenlm/archive/master.zip
+loading feature extractor configuration file ./preprocessor_config.json
+Feature extractor Wav2Vec2FeatureExtractor {                                                                                                                                                                                                                        | 0/3 [00:00<?, ?ba/s]
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",                                                                                                                                                                                                             | 0/3 [00:00<?, ?ba/s]
+  "feature_size": 1,
+  "padding_side": "right",                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+  "padding_value": 0,
+  "return_attention_mask": true,                                                                                                                                                                                                                                    | 0/3 [00:00<?, ?ba/s]
+  "sampling_rate": 16000                                                                                                                                                                                                                                            | 0/3 [00:00<?, ?ba/s]
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1209, in main
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+  File "/data/flax/lib/python3.8/site-packages/transformers/models/auto/processing_auto.py", line 243, in from_pretrained
+    return processor_class.from_pretrained(
+  File "/data/flax/lib/python3.8/site-packages/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py", line 144, in from_pretrained
+    decoder = BeamSearchDecoderCTC.load_from_dir(pretrained_model_name_or_path)
+  File "/data/flax/lib/python3.8/site-packages/pyctcdecode/decoder.py", line 790, in load_from_dir
+    filenames = cls.parse_directory_contents(filepath)
+  File "/data/flax/lib/python3.8/site-packages/pyctcdecode/decoder.py", line 765, in parse_directory_contents
+    raise ValueError(
+ValueError: Could not find alphabet file alphabet.json. Found ['preprocessor_config.json', 'wandb', 'vocab.json', 'config.json', 'run.sh', 'special_tokens_map.json', 'README.md', 'models', 'added_tokens.json', 'tokenizer_config.json', 'run_flax_speech_recognition_ctc.py']
\ No newline at end of file
diff --git a/wandb/run-20220729_225502-398l7dkj/files/requirements.txt b/wandb/run-20220729_225502-398l7dkj/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab
--- /dev/null
+++ b/wandb/run-20220729_225502-398l7dkj/files/requirements.txt
@@ -0,0 +1,158 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+exceptiongroup==1.0.0rc8
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+hypothesis==6.53.0
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+jiwer==2.3.0
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pyctcdecode==0.4.0
+pygments==2.11.1
+pygtrie==2.5.0
+pyparsing==3.0.6
+python-dateutil==2.8.2
+python-levenshtein==0.12.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+sortedcontainers==2.4.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220729_225502-398l7dkj/files/wandb-metadata.json b/wandb/run-20220729_225502-398l7dkj/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..737fedb9c437b13786c2b34e9005b0856ab8efdf
--- /dev/null
+++ b/wandb/run-20220729_225502-398l7dkj/files/wandb-metadata.json
@@ -0,0 +1,66 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-29T22:55:05.876646",
+    "startedAt": "2022-07-29T22:55:02.544361",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=./",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=128",
+        "--per_device_eval_batch_size=128",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220729_225502-398l7dkj/files/wandb-summary.json b/wandb/run-20220729_225502-398l7dkj/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..c1a4b169504c2c5ccd13d164b180d5f2c4a73be4
--- /dev/null
+++ b/wandb/run-20220729_225502-398l7dkj/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 714}}
\ No newline at end of file
diff --git a/wandb/run-20220729_225502-398l7dkj/logs/debug-internal.log b/wandb/run-20220729_225502-398l7dkj/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..af956dbd1c5eca6079c17b04182416e65e9f57fa
--- /dev/null
+++ b/wandb/run-20220729_225502-398l7dkj/logs/debug-internal.log
@@ -0,0 +1,508 @@
+2022-07-29 22:55:03,445 INFO    MainThread:3550240 [internal.py:wandb_internal():87] W&B internal server running at pid: 3550240, started at: 2022-07-29 22:55:03.445579
+2022-07-29 22:55:03,447 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: check_version
+2022-07-29 22:55:03,447 INFO    WriterThread:3550240 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/run-398l7dkj.wandb
+2022-07-29 22:55:03,448 DEBUG   SenderThread:3550240 [sender.py:send():234] send: header
+2022-07-29 22:55:03,448 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: check_version
+2022-07-29 22:55:03,485 DEBUG   SenderThread:3550240 [sender.py:send():234] send: run
+2022-07-29 22:55:03,659 INFO    SenderThread:3550240 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files
+2022-07-29 22:55:03,659 INFO    SenderThread:3550240 [sender.py:_start_run_threads():804] run started: 398l7dkj with start time 1659135302
+2022-07-29 22:55:03,660 DEBUG   SenderThread:3550240 [sender.py:send():234] send: summary
+2022-07-29 22:55:03,660 INFO    SenderThread:3550240 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 22:55:03,661 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: run_start
+2022-07-29 22:55:04,663 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/wandb-summary.json
+2022-07-29 22:55:05,876 DEBUG   HandlerThread:3550240 [meta.py:__init__():40] meta init
+2022-07-29 22:55:05,876 DEBUG   HandlerThread:3550240 [meta.py:__init__():54] meta init done
+2022-07-29 22:55:05,876 DEBUG   HandlerThread:3550240 [meta.py:probe():214] probe
+2022-07-29 22:55:05,877 DEBUG   HandlerThread:3550240 [meta.py:_setup_git():204] setup git
+2022-07-29 22:55:05,915 DEBUG   HandlerThread:3550240 [meta.py:_setup_git():211] setup git done
+2022-07-29 22:55:05,915 DEBUG   HandlerThread:3550240 [meta.py:_save_code():92] save code
+2022-07-29 22:55:05,928 DEBUG   HandlerThread:3550240 [meta.py:_save_code():113] save code done
+2022-07-29 22:55:05,928 DEBUG   HandlerThread:3550240 [meta.py:_save_patches():130] save patches
+2022-07-29 22:55:05,987 DEBUG   HandlerThread:3550240 [meta.py:_save_patches():172] save patches done
+2022-07-29 22:55:05,987 DEBUG   HandlerThread:3550240 [meta.py:_save_pip():58] save pip
+2022-07-29 22:55:05,988 DEBUG   HandlerThread:3550240 [meta.py:_save_pip():72] save pip done
+2022-07-29 22:55:05,988 DEBUG   HandlerThread:3550240 [meta.py:probe():252] probe done
+2022-07-29 22:55:05,991 DEBUG   SenderThread:3550240 [sender.py:send():234] send: files
+2022-07-29 22:55:05,991 INFO    SenderThread:3550240 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-29 22:55:05,992 INFO    SenderThread:3550240 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-29 22:55:05,999 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:55:05,999 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:55:06,492 INFO    Thread-11 :3550240 [upload_job.py:push():137] Uploaded file /tmp/tmpdst8kfh8wandb/24kwmtby-wandb-metadata.json
+2022-07-29 22:55:06,665 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/requirements.txt
+2022-07-29 22:55:06,666 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/wandb-metadata.json
+2022-07-29 22:55:06,666 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/code/run_flax_speech_recognition_ctc.py
+2022-07-29 22:55:06,666 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:55:06,666 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/code
+2022-07-29 22:55:07,596 INFO    Thread-12 :3550240 [upload_job.py:push():137] Uploaded file /tmp/tmpdst8kfh8wandb/2ri72msf-code/run_flax_speech_recognition_ctc.py
+2022-07-29 22:55:08,666 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:55:10,667 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:55:12,669 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:55:14,670 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:55:20,673 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:55:21,138 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:55:21,139 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:55:22,674 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:55:33,961 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 22:55:34,680 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:55:36,296 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:55:36,296 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:55:36,681 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:55:46,686 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:55:48,687 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:55:51,457 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:55:51,457 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:56:03,693 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:56:04,030 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 22:56:06,780 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:56:06,781 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:56:21,938 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:56:21,939 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:56:34,096 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 22:56:37,225 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:56:37,226 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:56:45,711 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:56:47,712 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:56:49,713 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:56:51,714 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:56:52,402 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:56:52,402 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:56:53,715 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:56:55,716 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:56:57,717 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:56:59,718 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:01,719 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:03,720 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:04,162 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 22:57:05,721 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:07,594 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:57:07,594 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:57:07,722 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:10,724 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:12,725 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:14,726 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:16,727 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:18,728 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:20,729 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:22,730 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:22,750 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:57:22,750 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:57:24,731 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:26,732 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:28,733 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:30,734 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:32,735 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:34,226 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 22:57:34,736 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:36,737 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:37,900 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:57:37,901 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:57:38,738 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:40,740 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:42,741 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:44,742 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:46,743 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:48,744 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:50,745 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:52,746 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:53,042 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:57:53,042 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:57:54,747 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:56,748 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:57:58,749 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:00,750 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:02,751 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:04,302 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 22:58:04,752 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:06,753 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:08,177 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:58:08,177 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:58:08,754 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:10,755 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:12,756 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:14,757 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:16,758 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:18,760 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:20,761 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:22,762 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:23,321 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:58:23,321 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:58:24,763 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:26,764 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:28,765 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:30,766 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:32,768 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:34,388 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 22:58:34,769 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:36,770 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:38,465 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:58:38,490 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:58:38,771 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:40,772 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:42,773 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:44,774 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:46,776 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:48,777 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:50,778 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:52,779 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:53,630 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:58:53,630 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:58:54,780 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:56,781 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:58:58,782 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:00,783 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:02,784 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:04,464 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 22:59:04,785 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:06,786 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:08,769 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:59:08,769 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:59:08,787 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:10,788 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:12,789 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:14,790 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:16,791 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:18,793 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:20,794 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:22,795 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:23,933 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:59:23,934 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:59:24,796 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:26,797 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:28,798 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:30,799 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:32,800 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:34,535 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 22:59:34,801 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:36,803 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:38,804 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:39,074 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:59:39,075 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:59:40,806 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:42,807 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:44,808 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:46,809 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:48,810 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:50,811 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:53,812 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:54,245 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 22:59:54,245 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 22:59:55,813 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:57,814 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 22:59:59,815 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:01,816 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:03,817 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:04,620 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 23:00:05,818 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:07,819 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:09,382 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:00:09,383 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:00:09,821 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:11,822 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:13,823 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:15,824 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:17,828 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:19,829 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:21,830 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:23,831 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:24,522 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:00:24,523 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:00:25,832 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:27,833 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:29,834 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:31,836 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:33,837 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:34,711 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 23:00:35,838 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:37,839 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:39,665 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:00:39,665 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:00:39,840 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:41,841 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:43,842 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:45,843 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:47,844 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:49,845 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:51,846 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:53,848 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:54,828 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:00:54,828 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:00:55,849 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:57,850 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:00:59,851 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:01,852 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:03,853 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:04,791 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 23:01:05,854 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:07,855 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:09,856 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:09,977 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:01:09,977 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:01:11,857 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:13,858 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:15,859 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:17,860 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:19,861 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:21,862 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:23,863 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:25,185 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:01:25,186 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:01:25,864 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:27,870 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:29,866 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:31,866 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:33,867 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:34,866 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 23:01:35,868 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:37,869 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:39,870 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:40,810 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:01:40,811 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:01:41,871 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:43,872 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:45,874 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:47,875 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:49,878 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:51,879 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:53,880 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:55,881 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:55,948 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:01:55,948 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:01:57,883 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:01:59,884 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:02:01,885 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:02:03,886 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:02:04,951 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 23:02:05,887 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:02:07,888 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:02:09,889 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:02:11,183 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:02:11,183 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:02:11,891 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:02:13,892 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:02:15,892 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:02:17,893 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:02:26,338 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:02:26,338 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:02:35,025 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 23:02:41,474 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:02:41,475 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:02:55,909 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:02:56,613 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:02:56,614 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:02:57,910 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:02:59,911 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:01,912 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:03,913 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:05,097 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 23:03:05,914 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:07,915 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:09,916 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:11,780 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:03:11,780 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:03:11,917 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:13,917 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:15,918 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:17,919 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:19,920 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:21,923 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:23,922 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:25,923 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:26,924 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:03:26,924 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:03:33,927 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:35,167 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 23:03:35,928 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:37,928 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:39,929 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:03:42,076 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:03:42,076 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:03:57,218 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:03:57,218 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:04:05,245 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 23:04:09,941 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:12,375 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:04:12,376 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:04:20,946 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:22,947 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:24,948 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:26,949 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:27,518 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:04:27,518 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:04:28,950 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:30,951 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:32,954 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:34,955 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:35,322 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 23:04:36,956 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:38,957 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:40,958 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:42,666 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:04:42,666 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:04:42,959 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:44,960 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:46,961 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:48,963 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:50,964 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:52,965 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:54,966 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:56,967 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:04:57,814 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:04:57,814 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:04:58,968 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:00,969 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:02,970 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:04,971 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:05,420 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 23:05:06,972 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:08,973 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:10,974 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:12,976 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:13,306 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:05:13,306 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:05:14,977 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:16,978 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:18,979 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:20,980 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:22,981 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:24,982 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:26,983 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:28,447 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:05:28,447 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:05:28,984 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:30,985 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:32,986 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:34,987 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:35,527 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 23:05:36,988 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:38,989 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:40,990 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:05:43,648 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:05:43,794 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:05:58,934 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:05:58,935 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:06:05,604 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 23:06:14,074 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:06:14,075 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:06:23,008 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:06:29,357 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:06:29,358 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:06:30,012 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:06:35,682 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 23:06:36,014 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:06:42,017 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:06:44,577 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:06:44,578 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:06:48,019 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:06:56,023 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:06:57,848 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:06:57,848 DEBUG   SenderThread:3550240 [sender.py:send():234] send: telemetry
+2022-07-29 23:06:57,848 DEBUG   SenderThread:3550240 [sender.py:send():234] send: exit
+2022-07-29 23:06:57,849 INFO    SenderThread:3550240 [sender.py:send_exit():366] handling exit code: 1
+2022-07-29 23:06:57,850 INFO    SenderThread:3550240 [sender.py:send_exit():368] handling runtime: 714
+2022-07-29 23:06:57,851 INFO    SenderThread:3550240 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 23:06:57,851 INFO    SenderThread:3550240 [sender.py:send_exit():374] send defer
+2022-07-29 23:06:57,851 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:06:57,852 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:06:57,852 INFO    HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-29 23:06:57,853 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:06:57,853 INFO    SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-29 23:06:57,853 INFO    SenderThread:3550240 [sender.py:transition_state():387] send defer: 1
+2022-07-29 23:06:57,853 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:06:57,853 INFO    HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-29 23:06:57,937 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:06:57,937 INFO    SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-29 23:06:57,937 INFO    SenderThread:3550240 [sender.py:transition_state():387] send defer: 2
+2022-07-29 23:06:57,938 DEBUG   SenderThread:3550240 [sender.py:send():234] send: stats
+2022-07-29 23:06:57,938 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:06:57,938 INFO    HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-29 23:06:57,938 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:06:57,939 INFO    SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-29 23:06:57,939 INFO    SenderThread:3550240 [sender.py:transition_state():387] send defer: 3
+2022-07-29 23:06:57,939 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:06:57,939 INFO    HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-29 23:06:57,939 DEBUG   SenderThread:3550240 [sender.py:send():234] send: summary
+2022-07-29 23:06:57,939 INFO    SenderThread:3550240 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 23:06:57,940 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:06:57,940 INFO    SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-29 23:06:57,940 INFO    SenderThread:3550240 [sender.py:transition_state():387] send defer: 4
+2022-07-29 23:06:57,940 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:06:57,940 INFO    HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-29 23:06:57,940 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:06:57,940 INFO    SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-29 23:06:57,954 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:06:58,024 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/wandb-summary.json
+2022-07-29 23:06:58,024 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:06:58,119 INFO    SenderThread:3550240 [sender.py:transition_state():387] send defer: 5
+2022-07-29 23:06:58,120 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:06:58,120 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:06:58,120 INFO    HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-29 23:06:58,120 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:06:58,120 INFO    SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-29 23:06:58,120 INFO    SenderThread:3550240 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-29 23:06:58,221 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:06:59,024 INFO    Thread-8  :3550240 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/config.yaml
+2022-07-29 23:06:59,025 INFO    SenderThread:3550240 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files
+2022-07-29 23:06:59,025 INFO    SenderThread:3550240 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/config.yaml config.yaml
+2022-07-29 23:06:59,026 INFO    SenderThread:3550240 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/requirements.txt requirements.txt
+2022-07-29 23:06:59,026 INFO    SenderThread:3550240 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log output.log
+2022-07-29 23:06:59,026 INFO    SenderThread:3550240 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/wandb-summary.json wandb-summary.json
+2022-07-29 23:06:59,026 INFO    SenderThread:3550240 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/wandb-metadata.json wandb-metadata.json
+2022-07-29 23:06:59,032 INFO    SenderThread:3550240 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-29 23:06:59,032 INFO    SenderThread:3550240 [sender.py:transition_state():387] send defer: 6
+2022-07-29 23:06:59,032 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:06:59,033 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:06:59,033 INFO    HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-29 23:06:59,034 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:06:59,034 INFO    SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-29 23:06:59,034 INFO    SenderThread:3550240 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 23:06:59,134 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:06:59,134 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:06:59,236 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:06:59,236 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:06:59,337 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:06:59,338 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:06:59,439 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:06:59,440 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:06:59,522 INFO    Thread-13 :3550240 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/config.yaml
+2022-07-29 23:06:59,541 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:06:59,541 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:06:59,582 INFO    Thread-16 :3550240 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/wandb-summary.json
+2022-07-29 23:06:59,603 INFO    Thread-14 :3550240 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/requirements.txt
+2022-07-29 23:06:59,643 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:06:59,643 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:06:59,733 INFO    Thread-15 :3550240 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/files/output.log
+2022-07-29 23:06:59,744 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:06:59,745 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:06:59,846 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:06:59,846 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:06:59,934 INFO    Thread-7  :3550240 [sender.py:transition_state():387] send defer: 7
+2022-07-29 23:06:59,934 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:06:59,934 INFO    HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-29 23:06:59,935 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:06:59,935 INFO    SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-29 23:06:59,948 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:07:00,405 INFO    SenderThread:3550240 [sender.py:transition_state():387] send defer: 8
+2022-07-29 23:07:00,405 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:07:00,406 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:07:00,406 INFO    HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-29 23:07:00,406 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:07:00,406 INFO    SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-29 23:07:00,406 INFO    SenderThread:3550240 [sender.py:transition_state():387] send defer: 9
+2022-07-29 23:07:00,407 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:07:00,407 INFO    HandlerThread:3550240 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-29 23:07:00,407 DEBUG   SenderThread:3550240 [sender.py:send():234] send: final
+2022-07-29 23:07:00,407 DEBUG   SenderThread:3550240 [sender.py:send():234] send: footer
+2022-07-29 23:07:00,407 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:07:00,407 INFO    SenderThread:3550240 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-29 23:07:00,507 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:07:00,507 DEBUG   SenderThread:3550240 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:07:00,507 INFO    SenderThread:3550240 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 23:07:00,771 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-29 23:07:00,772 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-29 23:07:00,772 DEBUG   HandlerThread:3550240 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-29 23:07:00,772 INFO    HandlerThread:3550240 [handler.py:finish():731] shutting down handler
+2022-07-29 23:07:01,407 INFO    WriterThread:3550240 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/run-398l7dkj.wandb
+2022-07-29 23:07:01,770 INFO    SenderThread:3550240 [sender.py:finish():1070] shutting down sender
+2022-07-29 23:07:01,771 INFO    SenderThread:3550240 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 23:07:01,771 INFO    SenderThread:3550240 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 23:07:01,774 INFO    MainThread:3550240 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220729_225502-398l7dkj/logs/debug.log b/wandb/run-20220729_225502-398l7dkj/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..0cb91e910d9f15409f7f5790087dc5b25ac4ae5a
--- /dev/null
+++ b/wandb/run-20220729_225502-398l7dkj/logs/debug.log
@@ -0,0 +1,148 @@
+2022-07-29 22:55:02,545 INFO    MainThread:3548990 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-29 22:55:02,546 INFO    MainThread:3548990 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-29 22:55:02,546 INFO    MainThread:3548990 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/logs/debug.log
+2022-07-29 22:55:02,546 INFO    MainThread:3548990 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_225502-398l7dkj/logs/debug-internal.log
+2022-07-29 22:55:02,546 INFO    MainThread:3548990 [wandb_init.py:init():404] calling init triggers
+2022-07-29 22:55:02,546 INFO    MainThread:3548990 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-29 22:55:02,546 INFO    MainThread:3548990 [wandb_init.py:init():460] starting backend
+2022-07-29 22:55:02,546 INFO    MainThread:3548990 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-29 22:55:02,591 INFO    MainThread:3548990 [backend.py:ensure_launched():216] starting backend process...
+2022-07-29 22:55:02,633 INFO    MainThread:3548990 [backend.py:ensure_launched():221] started backend process with pid: 3550240
+2022-07-29 22:55:02,635 INFO    MainThread:3548990 [wandb_init.py:init():469] backend started and connected
+2022-07-29 22:55:02,648 INFO    MainThread:3548990 [wandb_init.py:init():533] updated telemetry
+2022-07-29 22:55:02,755 INFO    MainThread:3548990 [wandb_init.py:init():563] communicating current version
+2022-07-29 22:55:03,483 INFO    MainThread:3548990 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-29 22:55:03,484 INFO    MainThread:3548990 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-29 22:55:03,661 INFO    MainThread:3548990 [wandb_init.py:init():606] starting run threads in backend
+2022-07-29 22:55:05,995 INFO    MainThread:3548990 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-29 22:55:05,996 INFO    MainThread:3548990 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-29 22:55:05,996 INFO    MainThread:3548990 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-29 22:55:05,998 INFO    MainThread:3548990 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-29 22:55:05,998 INFO    MainThread:3548990 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-29 23:06:55,326 INFO    MainThread:3548990 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-29 23:06:55,330 INFO    MainThread:3548990 [wandb_run.py:_restore():1752] restore
+2022-07-29 23:06:57,852 INFO    MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73918
+  total_bytes: 73918
+}
+
+2022-07-29 23:06:58,120 INFO    MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73918
+  total_bytes: 73918
+}
+
+2022-07-29 23:06:59,033 INFO    MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73918
+  total_bytes: 332656
+}
+
+2022-07-29 23:06:59,135 INFO    MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73918
+  total_bytes: 332656
+}
+
+2022-07-29 23:06:59,237 INFO    MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 332656
+  total_bytes: 332656
+}
+
+2022-07-29 23:06:59,339 INFO    MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 332656
+  total_bytes: 332656
+}
+
+2022-07-29 23:06:59,440 INFO    MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 332656
+  total_bytes: 332656
+}
+
+2022-07-29 23:06:59,542 INFO    MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 332656
+  total_bytes: 332656
+}
+
+2022-07-29 23:06:59,644 INFO    MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 332656
+  total_bytes: 332656
+}
+
+2022-07-29 23:06:59,745 INFO    MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 332656
+  total_bytes: 332656
+}
+
+2022-07-29 23:06:59,847 INFO    MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 332656
+  total_bytes: 332656
+}
+
+2022-07-29 23:07:00,406 INFO    MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 332656
+  total_bytes: 332656
+}
+
+2022-07-29 23:07:00,771 INFO    MainThread:3548990 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 332656
+  total_bytes: 332656
+}
+local_info {
+}
+
+2022-07-29 23:07:02,394 INFO    MainThread:3548990 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220729_225502-398l7dkj/run-398l7dkj.wandb b/wandb/run-20220729_225502-398l7dkj/run-398l7dkj.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..0cf6b676aea3057f2db029cb42f492cf13e4e958
--- /dev/null
+++ b/wandb/run-20220729_225502-398l7dkj/run-398l7dkj.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0dbcd1f0f1caf2601a6ec76b266f1e0370a12efed9f5a3044abe1af8f770a741
+size 379814
diff --git a/wandb/run-20220729_231127-1dfdwyjl/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220729_231127-1dfdwyjl/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac
--- /dev/null
+++ b/wandb/run-20220729_231127-1dfdwyjl/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1605 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220729_231127-1dfdwyjl/files/config.yaml b/wandb/run-20220729_231127-1dfdwyjl/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4efdc4427823db79e345df347c79507f79524662
--- /dev/null
+++ b/wandb/run-20220729_231127-1dfdwyjl/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659136287
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220729_231127-1dfdwyjl/files/output.log b/wandb/run-20220729_231127-1dfdwyjl/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..1893b32522e6791e9a9fab73f422ae04f0e5c40d
--- /dev/null
+++ b/wandb/run-20220729_231127-1dfdwyjl/files/output.log
@@ -0,0 +1,1556 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul29_23-11-24_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=128,
+per_device_train_batch_size=128,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 76.19it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 444.16it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_hid', 'bias'), ('project_hid', 'kernel'), ('project_q', 'bias'), ('project_q', 'kernel'), ('quantizer', 'codevectors'), ('quantizer', 'weight_proj', 'bias'), ('quantizer', 'weight_proj', 'kernel')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9193.91ex/s]
+removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8958.21ex/s]
+removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8807.85ex/s]
+removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8784.75ex/s]
+removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8832.04ex/s]
+removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8739.93ex/s]
+removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8775.53ex/s]
+removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8387.11ex/s]
+removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8735.11ex/s]
+removing punctuation from train split #9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8755.82ex/s]
+removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9024.17ex/s]
+removing punctuation from train split #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8645.13ex/s]
+removing punctuation from train split #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8553.59ex/s]
+removing punctuation from train split #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8613.35ex/s]
+removing punctuation from train split #7:  93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏            | 8904/9523 [00:01<00:00, 7980.09ex/s]
+removing punctuation from train split #8:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌           | 8972/9523 [00:01<00:00, 8456.61ex/s]
+removing punctuation from train split #9:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌             | 8871/9523 [00:01<00:00, 8764.44ex/s]
+removing punctuation from train split #10:  55%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                        | 5243/9523 [00:00<00:00, 8817.16ex/s]
+removing punctuation from train split #11:  55%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                        | 5209/9523 [00:00<00:00, 8834.64ex/s]
+removing punctuation from train split #10:  64%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                      | 6125/9523 [00:00<00:00, 8529.96ex/s]
+removing punctuation from train split #10:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉         | 9081/9523 [00:01<00:00, 9422.28ex/s]
+removing punctuation from train split #11:  83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                  | 7871/9523 [00:00<00:00, 8692.94ex/s]
+removing punctuation from train split #11:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏               | 8753/9523 [00:01<00:00, 8729.01ex/s]
+removing punctuation from train split #12:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                  | 8650/9522 [00:01<00:00, 8652.34ex/s]
+removing punctuation from train split #12: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 9520/9522 [00:01<00:00, 8664.29ex/s]
+removing punctuation from train split #13:  91%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                | 8703/9522 [00:01<00:00, 8638.43ex/s]
+removing punctuation from train split #15:  72%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                      | 6869/9522 [00:00<00:00, 8755.53ex/s]
+removing punctuation from train split #14:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎               | 8760/9522 [00:01<00:00, 8884.44ex/s]
+removing punctuation from train split #16:  73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                     | 6936/9522 [00:00<00:00, 8748.75ex/s]
+removing punctuation from train split #16:  82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                  | 7829/9522 [00:00<00:00, 8803.55ex/s]
+removing punctuation from train split #18:  62%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                          | 5905/9522 [00:00<00:00, 8674.73ex/s]
+removing punctuation from train split #17:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                     | 7699/9522 [00:00<00:00, 8739.44ex/s]
+removing punctuation from train split #19:  62%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                          | 5908/9522 [00:00<00:00, 8170.09ex/s]
+removing punctuation from train split #20:  63%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                       | 6038/9522 [00:00<00:00, 8747.93ex/s]
+removing punctuation from train split #21:  53%|███████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                            | 5019/9522 [00:00<00:00, 8166.74ex/s]
+removing punctuation from train split #24:  26%|███████████████████████████████████████████████████▋                                                                                                                                                | 2512/9522 [00:00<00:00, 8445.21ex/s]
+removing punctuation from train split #22:  54%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                         | 5188/9522 [00:00<00:00, 8759.94ex/s]
+removing punctuation from train split #24:  36%|█████████████████████████████████████████████████████████████████████▉                                                                                                                              | 3396/9522 [00:00<00:00, 8597.94ex/s]
+removing punctuation from train split #26:  17%|█████████████████████████████████▊                                                                                                                                                                  | 1645/9522 [00:00<00:00, 8275.63ex/s]
+removing punctuation from train split #27:   7%|█████████████▉                                                                                                                                                                                       | 676/9522 [00:00<00:01, 6755.98ex/s]
+removing punctuation from train split #26:  26%|███████████████████████████████████████████████████▊                                                                                                                                                | 2520/9522 [00:00<00:00, 8489.28ex/s]
+removing punctuation from train split #28:   8%|████████████████                                                                                                                                                                                     | 776/9522 [00:00<00:01, 7757.84ex/s]
+removing punctuation from train split #29:   8%|████████████████▍                                                                                                                                                                                    | 794/9522 [00:00<00:01, 7931.14ex/s]
+removing punctuation from train split #23:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏             | 8848/9522 [00:01<00:00, 9067.40ex/s]
+removing punctuation from train split #24:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                               | 8010/9522 [00:00<00:00, 9242.91ex/s]
+removing punctuation from train split #25:  74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                   | 7044/9522 [00:00<00:00, 9045.07ex/s]
+removing punctuation from train split #26:  64%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                      | 6123/9522 [00:00<00:00, 8965.65ex/s]
+removing punctuation from train split #24:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉            | 8935/9522 [00:01<00:00, 8746.49ex/s]
+removing punctuation from train split #25:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏            | 8900/9522 [00:01<00:00, 9158.21ex/s]
+removing punctuation from train split #26:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                               | 7993/9522 [00:00<00:00, 8934.43ex/s]
+removing punctuation from train split #26:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌           | 8967/9522 [00:01<00:00, 9175.35ex/s]
+removing punctuation from train split #28:  65%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                    | 6182/9522 [00:00<00:00, 9114.81ex/s]
+removing punctuation from train split #27:  83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                 | 7897/9522 [00:00<00:00, 8462.44ex/s]
+removing punctuation from train split #27:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏             | 8849/9522 [00:01<00:00, 8770.22ex/s]
+removing punctuation from train split #28:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████           | 8988/9522 [00:01<00:00, 8946.88ex/s]
+removing punctuation from train split #29:  85%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                              | 8068/9522 [00:00<00:00, 8464.40ex/s]
+removing punctuation from train split #29:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉          | 9032/9522 [00:01<00:00, 8804.29ex/s]
+removing punctuation from train split #31:  87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                          | 8242/9522 [00:00<00:00, 8701.18ex/s]
+removing punctuation from train split #30:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉          | 9036/9522 [00:01<00:00, 8927.84ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow9124/9522 [00:01<00:00, 8249.38ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow
+preprocess dataset #0:   0%|█                                                                                                                                                                                                                           | 47/9497 [00:01<04:09, 37.86ex/s]
+preprocess dataset #1:   0%|▌                                                                                                                                                                                                                           | 26/9497 [00:01<04:53, 32.29ex/s]
+preprocess dataset #2:   1%|█▋                                                                                                                                                                                                                          | 74/9497 [00:02<03:01, 51.86ex/s]
+preprocess dataset #3:   1%|█▏                                                                                                                                                                                                                          | 50/9497 [00:01<03:12, 49.12ex/s]
+preprocess dataset #4:   0%|▋                                                                                                                                                                                                                           | 30/9497 [00:01<03:44, 42.23ex/s]
+preprocess dataset #5:   1%|██▎                                                                                                                                                                                                                        | 102/9497 [00:02<02:59, 52.44ex/s]
+preprocess dataset #6:   1%|█▏                                                                                                                                                                                                                          | 49/9497 [00:01<03:16, 48.06ex/s]
+preprocess dataset #7:   0%|▎                                                                                                                                                                                                                           | 12/9497 [00:00<08:35, 18.40ex/s]
+preprocess dataset #8:   1%|█▌                                                                                                                                                                                                                          | 65/9497 [00:01<03:11, 49.33ex/s]
+preprocess dataset #9:   0%|▏                                                                                                                                                                                                                            | 6/9497 [00:00<17:10,  9.21ex/s]
+preprocess dataset #10:   1%|█▉                                                                                                                                                                                                                         | 85/9497 [00:02<03:30, 44.78ex/s]
+preprocess dataset #11:   0%|▉                                                                                                                                                                                                                          | 38/9496 [00:01<03:52, 40.64ex/s]
+preprocess dataset #12:   1%|█▋                                                                                                                                                                                                                         | 73/9496 [00:02<03:25, 45.85ex/s]
+preprocess dataset #13:   0%|▉                                                                                                                                                                                                                          | 39/9496 [00:01<03:54, 40.29ex/s]
+preprocess dataset #14:   1%|█▊                                                                                                                                                                                                                         | 80/9496 [00:02<04:16, 36.75ex/s]
+preprocess dataset #15:   0%|▉                                                                                                                                                                                                                          | 39/9496 [00:01<04:15, 36.95ex/s]
+preprocess dataset #16:   1%|██                                                                                                                                                                                                                         | 88/9496 [00:02<03:49, 41.04ex/s]
+preprocess dataset #17:   1%|█▏                                                                                                                                                                                                                         | 54/9496 [00:02<03:46, 41.74ex/s]
+preprocess dataset #18:   1%|█▍                                                                                                                                                                                                                         | 63/9496 [00:02<03:46, 41.61ex/s]
+preprocess dataset #19:   0%|▌                                                                                                                                                                                                                          | 27/9496 [00:01<04:45, 33.17ex/s]
+preprocess dataset #20:   1%|██▏                                                                                                                                                                                                                        | 96/9496 [00:03<03:59, 39.22ex/s]
+preprocess dataset #21:   0%|█                                                                                                                                                                                                                          | 44/9496 [00:01<04:26, 35.43ex/s]
+preprocess dataset #22:   1%|█▊                                                                                                                                                                                                                         | 76/9496 [00:03<04:27, 35.16ex/s]
+preprocess dataset #23:   0%|▍                                                                                                                                                                                                                          | 20/9496 [00:01<06:38, 23.79ex/s]
+preprocess dataset #24:   1%|█▏                                                                                                                                                                                                                         | 49/9496 [00:02<05:52, 26.77ex/s]
+preprocess dataset #25:   0%|▍                                                                                                                                                                                                                          | 17/9496 [00:01<07:34, 20.86ex/s]
+preprocess dataset #26:   0%|▋                                                                                                                                                                                                                          | 32/9496 [00:01<05:28, 28.82ex/s]
+preprocess dataset #27:   1%|█▍                                                                                                                                                                                                                         | 64/9496 [00:03<05:27, 28.79ex/s]
+preprocess dataset #28:   0%|▊                                                                                                                                                                                                                          | 33/9496 [00:01<06:14, 25.27ex/s]
+preprocess dataset #29:   0%|█                                                                                                                                                                                                                          | 47/9496 [00:02<05:43, 27.48ex/s]
+preprocess dataset #30:   0%|▏                                                                                                                                                                                                                           | 8/9496 [00:01<14:28, 10.92ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  44%|███████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                                         | 4172/9496 [02:25<02:41, 32.92ex/s]
+
+
+
+
+preprocess dataset #30:  47%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                                  | 4496/9496 [02:35<02:21, 35.37ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  58%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                           | 5511/9496 [03:10<02:19, 28.60ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                              | 6764/9496 [03:52<01:32, 29.51ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  79%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                            | 7549/9496 [04:18<01:02, 31.36ex/s]
+
+
+preprocess dataset #30:  81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                        | 7731/9496 [04:24<00:57, 30.66ex/s]
+
+
+
+
+
+
+preprocess dataset #30:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                              | 8164/9496 [04:38<00:37, 35.53ex/s]
+preprocess dataset #28:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                        | 8419/9496 [04:42<00:33, 31.86ex/s]
+preprocess dataset #29:  87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                           | 8294/9496 [04:41<00:29, 40.13ex/s]
+preprocess dataset #30:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                             | 8230/9496 [04:40<00:35, 35.23ex/s]
+preprocess dataset #30:  88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                          | 8321/9496 [04:42<00:31, 37.50ex/s]
+preprocess dataset #30:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                         | 8400/9496 [04:44<00:25, 42.61ex/s]
+preprocess dataset #29:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                      | 8520/9496 [04:47<00:27, 35.34ex/s]
+preprocess dataset #30:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                       | 8479/9496 [04:46<00:24, 41.29ex/s]
+preprocess dataset #30:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                      | 8524/9496 [04:47<00:24, 39.90ex/s]
+preprocess dataset #29:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                  | 8677/9496 [04:52<00:22, 37.10ex/s]
+preprocess dataset #30:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                   | 8649/9496 [04:50<00:17, 47.74ex/s]
+
+preprocess dataset #29:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏              | 8846/9496 [04:56<00:13, 49.05ex/s]
+preprocess dataset #30:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌               | 8823/9496 [04:54<00:14, 45.30ex/s]
+preprocess dataset #29:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉             | 8924/9496 [04:57<00:12, 46.54ex/s]
+preprocess dataset #30:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍             | 8900/9496 [04:56<00:12, 46.90ex/s]
+preprocess dataset #30:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍           | 8990/9496 [04:58<00:09, 51.57ex/s]
+preprocess dataset #30:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌           | 8996/9496 [04:58<00:10, 48.61ex/s]
+preprocess dataset #30:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████          | 9061/9496 [05:00<00:08, 52.89ex/s]
+preprocess dataset #27:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 9409/9496 [05:06<00:01, 61.40ex/s]
+preprocess dataset #28:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍    | 9299/9496 [05:05<00:04, 41.62ex/s]
+preprocess dataset #29:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎      | 9201/9496 [05:04<00:06, 43.92ex/s]
+preprocess dataset #30:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████       | 9195/9496 [05:02<00:04, 61.11ex/s]
+preprocess dataset #28:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 9403/9496 [05:07<00:01, 73.94ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9418/9496 [05:07<00:01, 61.93ex/s]
+preprocess dataset #29:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋    | 9309/9496 [05:06<00:02, 65.64ex/s]
+preprocess dataset #30:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉    | 9320/9496 [05:04<00:02, 65.86ex/s]
+preprocess dataset #30:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9416/9496 [05:06<00:01, 56.68ex/s]
+preprocess dataset #31: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 9459/9496 [05:06<00:00, 75.92ex/s]
+preprocess dataset #4:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #5:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #6:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #7:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #8:   3%|██████▍                                                                                                                                                                                                                     | 37/1267 [00:01<00:38, 32.30ex/s]
+preprocess dataset #9:   3%|█████▉                                                                                                                                                                                                                      | 34/1267 [00:01<00:39, 31.08ex/s]
+preprocess dataset #10:   2%|█████▏                                                                                                                                                                                                                     | 30/1267 [00:01<00:43, 28.63ex/s]
+preprocess dataset #11:   3%|██████▍                                                                                                                                                                                                                    | 37/1267 [00:01<00:37, 33.19ex/s]
+preprocess dataset #12:   2%|████▊                                                                                                                                                                                                                      | 28/1267 [00:01<00:44, 27.76ex/s]
+preprocess dataset #13:   2%|████▍                                                                                                                                                                                                                      | 26/1267 [00:01<00:43, 28.56ex/s]
+preprocess dataset #14:   2%|███▉                                                                                                                                                                                                                       | 23/1267 [00:01<00:48, 25.53ex/s]
+preprocess dataset #15:   2%|███▉                                                                                                                                                                                                                       | 23/1267 [00:01<00:47, 26.28ex/s]
+preprocess dataset #16:   2%|████▊                                                                                                                                                                                                                      | 28/1267 [00:01<00:39, 31.55ex/s]
+preprocess dataset #17:   2%|████▋                                                                                                                                                                                                                      | 27/1267 [00:01<00:42, 29.26ex/s]
+preprocess dataset #18:   2%|████▏                                                                                                                                                                                                                      | 24/1267 [00:01<00:46, 26.64ex/s]
+preprocess dataset #19:   2%|█████▎                                                                                                                                                                                                                     | 31/1267 [00:01<00:33, 37.18ex/s]
+preprocess dataset #20:   2%|███▉                                                                                                                                                                                                                       | 23/1266 [00:01<00:43, 28.33ex/s]
+preprocess dataset #21:   1%|██▊                                                                                                                                                                                                                        | 16/1266 [00:01<01:08, 18.32ex/s]
+preprocess dataset #22:   2%|███▎                                                                                                                                                                                                                       | 19/1266 [00:01<00:53, 23.20ex/s]
+preprocess dataset #23:   1%|██▊                                                                                                                                                                                                                        | 16/1266 [00:01<01:04, 19.39ex/s]
+preprocess dataset #24:   1%|██▌                                                                                                                                                                                                                        | 15/1266 [00:01<01:07, 18.45ex/s]
+preprocess dataset #25:   1%|██▌                                                                                                                                                                                                                        | 15/1266 [00:01<01:03, 19.85ex/s]
+preprocess dataset #26:   1%|██▏                                                                                                                                                                                                                        | 13/1266 [00:01<01:20, 15.66ex/s]
+preprocess dataset #27:   1%|██▏                                                                                                                                                                                                                        | 13/1266 [00:01<01:15, 16.58ex/s]
+preprocess dataset #28:   1%|██▍                                                                                                                                                                                                                        | 14/1266 [00:01<01:03, 19.64ex/s]
+preprocess dataset #29:   1%|██                                                                                                                                                                                                                         | 12/1266 [00:01<01:19, 15.79ex/s]
+preprocess dataset #30:   1%|██▏                                                                                                                                                                                                                        | 13/1266 [00:01<01:13, 17.12ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                | 1078/1266 [00:35<00:05, 36.89ex/s]
+
+preprocess dataset #26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 1260/1266 [00:39<00:00, 30.56ex/s]
+preprocess dataset #25:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌    | 1240/1266 [00:39<00:00, 35.26ex/s]
+preprocess dataset #23:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████     | 1237/1266 [00:39<00:01, 26.30ex/s]
+preprocess dataset #27:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 1255/1266 [00:39<00:00, 33.73ex/s]
+preprocess dataset #26: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 1264/1266 [00:39<00:00, 30.39ex/s]
+preprocess dataset #30:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋         | 1212/1266 [00:38<00:01, 30.32ex/s]
+preprocess dataset #28:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████     | 1237/1266 [00:39<00:00, 31.48ex/s]
+preprocess dataset #29:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋   | 1247/1266 [00:39<00:00, 31.31ex/s]
+preprocess dataset #30:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍        | 1216/1266 [00:39<00:01, 31.40ex/s]
+preprocess dataset #21:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 1251/1266 [00:39<00:00, 37.00ex/s]
+preprocess dataset #28:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 1250/1266 [00:39<00:00, 33.91ex/s]
+preprocess dataset #28:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 1254/1266 [00:39<00:00, 35.01ex/s]
+preprocess dataset #20:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1259/1266 [00:40<00:00, 35.46ex/s]
+preprocess dataset #21:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 1257/1266 [00:39<00:00, 42.73ex/s]
+preprocess dataset #20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1266/1266 [00:40<00:00, 44.41ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 1264/1266 [00:39<00:00, 34.43ex/s]
+preprocess dataset #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 1264/1266 [00:40<00:00, 46.87ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 1258/1266 [00:39<00:00, 35.61ex/s]
+preprocess dataset #23: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1265/1266 [00:40<00:00, 36.33ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1265/1266 [00:39<00:00, 43.32ex/s]
+preprocess dataset #30:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍   | 1245/1266 [00:39<00:00, 41.95ex/s]
+preprocess dataset #30:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 1250/1266 [00:39<00:00, 43.39ex/s]
+preprocess dataset #17:   0%|▌                                                                                                                                                                                                                           | 7/2554 [00:01<04:32,  9.35ex/s]
+preprocess dataset #18:   0%|▌                                                                                                                                                                                                                           | 7/2554 [00:01<04:33,  9.33ex/s]
+preprocess dataset #19:   0%|▌                                                                                                                                                                                                                           | 7/2554 [00:00<04:10, 10.17ex/s]
+preprocess dataset #20:   0%|▌                                                                                                                                                                                                                           | 7/2554 [00:00<04:22,  9.69ex/s]
+preprocess dataset #21:   0%|▎                                                                                                                                                                                                                           | 4/2554 [00:00<07:40,  5.54ex/s]
+preprocess dataset #22:   0%|                                                                                                                                                                                                                            | 1/2554 [00:00<35:59,  1.18ex/s]
+preprocess dataset #23:   0%|                                                                                                                                                                                                                            | 1/2554 [00:00<37:00,  1.15ex/s]
+preprocess dataset #24:   0%|                                                                                                                                                                                                                            | 1/2554 [00:00<33:07,  1.28ex/s]
+preprocess dataset #25:   0%|                                                                                                                                                                                                                            | 1/2554 [00:00<33:22,  1.27ex/s]
+preprocess dataset #26:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #27:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #28:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #29:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #30:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #29:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊        | 2457/2554 [01:17<00:02, 40.87ex/s]
+preprocess dataset #30:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████        | 2460/2554 [01:16<00:02, 38.88ex/s]
+preprocess dataset #23:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌    | 2502/2554 [01:19<00:01, 32.13ex/s]
+preprocess dataset #24:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 2514/2554 [01:19<00:01, 28.45ex/s]
+preprocess dataset #25:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 2508/2554 [01:19<00:01, 30.35ex/s]
+preprocess dataset #26:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 2513/2554 [01:19<00:01, 29.64ex/s]
+preprocess dataset #27:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 2513/2554 [01:19<00:01, 29.31ex/s]
+preprocess dataset #28:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍   | 2512/2554 [01:19<00:01, 31.55ex/s]
+preprocess dataset #29:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 2514/2554 [01:19<00:01, 28.33ex/s]
+preprocess dataset #30:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 2526/2554 [01:18<00:00, 30.48ex/s]
+preprocess dataset #19: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 2549/2554 [01:20<00:00, 42.15ex/s]
+preprocess dataset #15:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 2533/2554 [01:20<00:00, 29.65ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2550/2554 [01:19<00:00, 37.00ex/s]
+preprocess dataset #15:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 2537/2554 [01:21<00:00, 23.13ex/s]
+preprocess dataset #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 2549/2554 [01:21<00:00, 22.22ex/s]
+preprocess dataset #17: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 2552/2554 [01:20<00:00, 22.21ex/s]
+preprocess dataset #23: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2550/2554 [01:20<00:00, 39.62ex/s]
+preprocess dataset #26: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2548/2554 [01:20<00:00, 32.27ex/s]
+preprocess dataset #25: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2550/2554 [01:20<00:00, 41.01ex/s]
+preprocess dataset #26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2554/2554 [01:20<00:00, 38.59ex/s]
+preprocess dataset #31:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 2527/2554 [01:20<00:00, 33.25ex/s]
+preprocess dataset #31:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 2531/2554 [01:20<00:00, 32.41ex/s]
+preprocess dataset #31:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 2538/2554 [01:20<00:00, 25.15ex/s]
+#12:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#13:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#14:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#15:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#16:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#17:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 529.38ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 524.35ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 384.25ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 534.71ba/s]
+#1: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 237.56ba/s]
+#0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 174.38ba/s]
+#2: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 258.11ba/s]
+#3: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 208.46ba/s]
+#3:   0%|                                                                                                                                                                                                                                                           | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#4:   0%|                                                                                                                                                                                                                                                           | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 338.00ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 356.18ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 322.69ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 339.09ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 351.60ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 344.07ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 342.33ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 364.79ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 364.36ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 356.97ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 351.92ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 326.75ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 129.13ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 133.17ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 139.04ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 132.40ba/s]
+https://symbolize.stripped_domain/r/?trace=7f55d90e23f4,7f55d91360bf,7f,3c2162103d216c13&map=
+*** SIGTERM received by PID 1566186 (TID 1566186) on cpu 57 from PID 2559923; stack trace: ***
+https://symbolize.stripped_domain/r/?trace=7f55d90e23f4,7f55d91360bf,7f,3c2162103d216c13&map=
+*** SIGTERM received by PID 1566250 (TID 1566250) on cpu 60 from PID 2559923; stack trace: ***                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+PC: @     0x7f55d90e23f4  (unknown)  do_futex_wait.constprop.0
+PC: @     0x7f55d90e23f4  (unknown)  do_futex_wait.constprop.0                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @     0x7f538991b294        976  (unknown)
+    @     0x7f538991b294        976  (unknown)
+    @     0x7f55d91360c0  973081744  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @     0x7f55d91360c0  973081744  (unknown)████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                        | 9/10 [00:00<00:00, 85.18ba/s]
+    @               0x80  (unknown)  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @               0x80  (unknown)  (unknown)
+    @ 0x3c2162103d216c14  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7f55d90e23f4,7f538991b293,7f55d91360bf,7f,3c2162103d216c13&map=fbcd4e3f2be272741f2aecd9d840a066:7f537437e000-7f5389cadc60                                                                                               | 0/10 [00:00<?, ?ba/s]
+E0729 23:23:04.590425 1566250 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+    @ 0x3c2162103d216c14  (unknown)  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+https://symbolize.stripped_domain/r/?trace=7f55d90e23f4,7f538991b293,7f55d91360bf,7f,3c2162103d216c13&map=fbcd4e3f2be272741f2aecd9d840a066:7f537437e000-7f5389cadc60
+E0729 23:23:04.591270 1566186 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                              | 0/10 [00:00<?, ?ba/s]
+E0729 23:23:04.659969 1566250 process_state.cc:774] RAW: Raising signal 15 with default behavior                                                                                                                                                                   | 0/10 [00:00<?, ?ba/s]
+E0729 23:23:04.662601 1566186 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#3: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 114.64ba/s]
+#1: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 77.59ba/s]
+#2: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 77.19ba/s]
+#5: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.48ba/s]
+#6: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 112.53ba/s]
+#4: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 92.19ba/s]
+#7: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 112.24ba/s]
+#8: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.92ba/s]
+#9: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 113.54ba/s]
+#10: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 113.88ba/s]
+#11: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.64ba/s]
+#12: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 112.76ba/s]
+#13: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 107.04ba/s]
+#14: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 106.27ba/s]
+#15: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.24ba/s]
+#16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 113.43ba/s]
+#17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 112.98ba/s]
+#18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 115.06ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 114.70ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 114.45ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 115.14ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 107.86ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 115.07ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 114.79ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 114.49ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 114.00ba/s]
+#26: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 76.24ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 116.50ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 118.68ba/s]
+#28: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 97.93ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 124.91ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 124.60ba/s]
+#18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 122.22ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 122.53ba/s]
+#19: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 94.46ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 120.52ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 117.51ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 117.58ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 120.88ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 122.06ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 116.16ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 122.09ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 126.02ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 129.39ba/s]
+#28: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 79.60ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 107.08ba/s]
+https://symbolize.stripped_domain/r/?trace=7f55d90e23f4,7f55d91360bf,7f,bd177698bce5bf4c&map=
+*** SIGTERM received by PID 1566557 (TID 1566557) on cpu 9 from PID 2559923; stack trace: ***
+PC: @     0x7f55d90e23f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7f538991b294        976  (unknown)                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+    @     0x7f55d91360c0  973081744  (unknown)
+    @               0x80  (unknown)  (unknown)                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+    @ 0xbd177698bce5bf4d  (unknown)  (unknown)                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+https://symbolize.stripped_domain/r/?trace=7f55d90e23f4,7f538991b293,7f55d91360bf,7f,bd177698bce5bf4c&map=fbcd4e3f2be272741f2aecd9d840a066:7f537437e000-7f5389cadc60
+E0729 23:23:18.674062 1566557 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                               | 0/3 [00:00<?, ?ba/s]
+E0729 23:23:18.746977 1566557 process_state.cc:774] RAW: Raising signal 15 with default behavior                                                                                                                                                                    | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+    "Wav2Vec2ForPreTraining"                                                                                                                                                                                                                                        | 0/3 [00:00<?, ?ba/s]
+  ],
+  "attention_dropout": 0.094,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.04,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": true,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.047,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.041,
+  "mask_feature_length": 64,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.25,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.082,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 38,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 39,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file ./preprocessor_config.json
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+2022-07-29 23:23:21.061068: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
+2022-07-29 23:23:21.061132: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
+/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+INFO:__main__:***** Running training *****
+INFO:__main__:  Num examples = 302693
+INFO:__main__:  Num Epochs = 40
+INFO:__main__:  Instantaneous batch size per device = 128
+INFO:__main__:  Num gradient accumulation steps = 1
+INFO:__main__:  Total train batch size (w. parallel & distributed) = 1024
+INFO:__main__:  Total optimization steps = 11800
+INFO:__main__:  Gradient checkpointing: True
+INFO:__main__:  Use scan: False
+INFO:__main__:  Fuse matmuls: False
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [00:00<?, ?it/s]
+
+tcmalloc: large alloc 1310720000 bytes == 0x330728000 @  0x7f55d9306680 0x7f55d9327824 0x7f55d498bf34 0x7f55d498c64f 0x7f55d49ecf36 0x7f55d49ee29f 0x7f55d49ee737 0x7f55d4a8dfb7 0x5c6617 0x56bacd 0x569dba 0x50bca0 0x56cc1f 0x569dba 0x5f6eb3 0x59ca27 0x5f745f 0x57164c 0x569dba 0x50bca0 0x56cc1f 0x5f6cd6 0x59d81e 0x5f74f6 0x570d55 0x569dba 0x5f6eb3 0x56bacd 0x569dba 0x6902a7 0x67f951
+run_flax_speech_recognition_ctc.py:397: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+run_flax_speech_recognition_ctc.py:1392: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+run_flax_speech_recognition_ctc.py:1399: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+Training...:   0%|                                                                                                                                                                                                                                                | 0/295 [05:10<?, ?it/s]
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [05:18<?, ?it/s]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback
+    return fun(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss
+    out_tree, out_flat = f_pmapped_(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f
+    out = pxla.xla_pmap(
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind
+    return map_bind(self, fun, *args, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind
+    outs = primitive.process(top_trace, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process
+    return trace.process_map(self, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call
+    return primitive.impl(f, *tracers, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 829, in xla_pmap_impl
+    compiled_fun, fingerprint = parallel_callable(
+  File "/data/flax/lib/python3.8/site-packages/jax/linear_util.py", line 295, in memoized_fun
+    ans = call(fun, *args)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 860, in parallel_callable
+    pmap_executable = pmap_computation.compile()
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper
+    return func(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1137, in compile
+    self._executable = PmapExecutable.from_hlo(self._hlo, **self.compile_args)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1285, in from_hlo
+    compiled = dispatch.compile_or_get_cached(
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 899, in compile_or_get_cached
+    return backend_compile(backend, computation, compile_options, host_callbacks)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper
+    return func(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 843, in backend_compile
+    return backend.compile(built_c, compile_options=options)
+jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 51.78G of 15.48G hbm. Exceeded hbm capacity by 36.29G.
+Total hbm usage >= 52.29G:
+    reserved        530.00M
+    program          51.78G
+    arguments            0B
+Output size 0B; shares 0B with arguments.
+Program hbm requirement 51.78G:
+    global           132.0K
+    scoped           72.08M
+    HLO temp         51.71G (99.1% utilization: Unpadded (49.63G) Padded (50.08G), 3.1% fragmentation (1.63G))
+  Largest program allocations in hbm:
+  1. Size: 7.81G
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/47/remat(core_fn)/47/attention/jit(_einsum)/dot_general[dimension_numbers=(((2,), (3,)), ((0, 1), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/wav2vec2-1b-npsc-nst/models/modeling_flax_wav2vec2.py" source_line=387
+     Shape: f32[128,16,999,999]{2,3,1,0:T(8,128)}
+     Unpadded size: 7.61G
+     Extra memory due to padding: 203.12M (1.0x expansion)
+     XLA label: fusion.29627 = fusion(copy.5391.remat2, bitcast.10361), kind=kOutput, calls=fused_computation.22171
+     Allocation type: HLO temp
+     ==========================
+  2. Size: 7.81G
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/47/remat(core_fn)/47/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95
+     Shape: f32[128,16,999,999]{2,3,1,0:T(8,128)}
+     Unpadded size: 7.61G
+     Extra memory due to padding: 203.12M (1.0x expansion)
+     XLA label: fusion.190.remat7 = fusion(bitcast.10358, bitcast.10356, copy.5386), kind=kOutput, calls=fused_computation.186.clone.clone.clone.clone.clone.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  3. Size: 624.38M
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.13721.remat_compressed = copy(fusion.27401)
+     Allocation type: HLO temp
+     ==========================
+  4. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/22/22/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22380 = fusion(fusion.7480.remat, get-tuple-element.13471, get-tuple-element.13469, get-tuple-element.13473, ...(+5)), kind=kOutput, calls=fused_computation.20608
+     Allocation type: HLO temp
+     ==========================
+  5. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/44/44/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22336 = fusion(fusion.7414.remat, get-tuple-element.13855, get-tuple-element.13853, get-tuple-element.13857, ...(+5)), kind=kOutput, calls=fused_computation.20564
+     Allocation type: HLO temp
+     ==========================
+  6. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/39/39/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22346 = fusion(fusion.7429.remat, get-tuple-element.13759, get-tuple-element.13757, get-tuple-element.13761, ...(+5)), kind=kOutput, calls=fused_computation.20574
+     Allocation type: HLO temp
+     ==========================
+  7. Size: 624.38M
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: copy.8318 = copy(slice.408)
+     Allocation type: HLO temp
+     ==========================
+  8. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/feature_projection/projection/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22426 = fusion(copy.8318, get-tuple-element.13989, fusion.10472, get-tuple-element.10358, ...(+9)), kind=kOutput, calls=fused_computation.20654
+     Allocation type: HLO temp
+     ==========================
+  9. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/46/46/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22332 = fusion(fusion.7408.remat, get-tuple-element.13887, get-tuple-element.13885, get-tuple-element.13889, ...(+5)), kind=kOutput, calls=fused_computation.20560
+     Allocation type: HLO temp
+     ==========================
+  10. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22424 = fusion(get-tuple-element.11275, get-tuple-element.13231, get-tuple-element.13229, get-tuple-element.13233, ...(+5)), kind=kOutput, calls=fused_computation.20652
+     Allocation type: HLO temp
+     ==========================
+  11. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22422 = fusion(fusion.7543.remat, get-tuple-element.13247, get-tuple-element.13245, get-tuple-element.13249, ...(+5)), kind=kOutput, calls=fused_computation.20650
+     Allocation type: HLO temp
+     ==========================
+  12. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22420 = fusion(fusion.7540.remat, get-tuple-element.13423, get-tuple-element.13421, get-tuple-element.13425, ...(+5)), kind=kOutput, calls=fused_computation.20648
+     Allocation type: HLO temp
+     ==========================
+  13. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22418 = fusion(fusion.7537.remat, get-tuple-element.13599, get-tuple-element.13597, get-tuple-element.13601, ...(+5)), kind=kOutput, calls=fused_computation.20646
+     Allocation type: HLO temp
+     ==========================
+  14. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22416 = fusion(fusion.7534.remat, get-tuple-element.13775, get-tuple-element.13773, get-tuple-element.13777, ...(+5)), kind=kOutput, calls=fused_computation.20644
+     Allocation type: HLO temp
+     ==========================
+  15. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22414 = fusion(fusion.7531.remat, get-tuple-element.13919, get-tuple-element.13917, get-tuple-element.13921, ...(+5)), kind=kOutput, calls=fused_computation.20642
+     Allocation type: HLO temp
+     ==========================
+  16. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22412 = fusion(fusion.7528.remat, get-tuple-element.13935, get-tuple-element.13933, get-tuple-element.13937, ...(+5)), kind=kOutput, calls=fused_computation.20640
+     Allocation type: HLO temp
+     ==========================
+  17. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22410 = fusion(fusion.7525.remat, get-tuple-element.13951, get-tuple-element.13949, get-tuple-element.13953, ...(+5)), kind=kOutput, calls=fused_computation.20638
+     Allocation type: HLO temp
+     ==========================
+  18. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22408 = fusion(fusion.7522.remat, get-tuple-element.13967, get-tuple-element.13965, get-tuple-element.13969, ...(+5)), kind=kOutput, calls=fused_computation.20636
+     Allocation type: HLO temp
+     ==========================
+  19. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22406 = fusion(fusion.7519.remat, get-tuple-element.13983, get-tuple-element.13981, get-tuple-element.13985, ...(+5)), kind=kOutput, calls=fused_computation.20634
+     Allocation type: HLO temp
+     ==========================
+  20. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/10/10/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22404 = fusion(fusion.7516.remat, get-tuple-element.13263, get-tuple-element.13261, get-tuple-element.13265, ...(+5)), kind=kOutput, calls=fused_computation.20632
+     Allocation type: HLO temp
+     ==========================
+The stack trace below excludes JAX-internal frames.
+The preceding is the original exception that occurred, unmodified.
+--------------------
+The above exception was the direct cause of the following exception:
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 51.78G of 15.48G hbm. Exceeded hbm capacity by 36.29G.
+Total hbm usage >= 52.29G:
+    reserved        530.00M
+    program          51.78G
+    arguments            0B
+Output size 0B; shares 0B with arguments.
+Program hbm requirement 51.78G:
+    global           132.0K
+    scoped           72.08M
+    HLO temp         51.71G (99.1% utilization: Unpadded (49.63G) Padded (50.08G), 3.1% fragmentation (1.63G))
+  Largest program allocations in hbm:
+  1. Size: 7.81G
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/47/remat(core_fn)/47/attention/jit(_einsum)/dot_general[dimension_numbers=(((2,), (3,)), ((0, 1), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/wav2vec2-1b-npsc-nst/models/modeling_flax_wav2vec2.py" source_line=387
+     Shape: f32[128,16,999,999]{2,3,1,0:T(8,128)}
+     Unpadded size: 7.61G
+     Extra memory due to padding: 203.12M (1.0x expansion)
+     XLA label: fusion.29627 = fusion(copy.5391.remat2, bitcast.10361), kind=kOutput, calls=fused_computation.22171
+     Allocation type: HLO temp
+     ==========================
+  2. Size: 7.81G
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/47/remat(core_fn)/47/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95
+     Shape: f32[128,16,999,999]{2,3,1,0:T(8,128)}
+     Unpadded size: 7.61G
+     Extra memory due to padding: 203.12M (1.0x expansion)
+     XLA label: fusion.190.remat7 = fusion(bitcast.10358, bitcast.10356, copy.5386), kind=kOutput, calls=fused_computation.186.clone.clone.clone.clone.clone.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  3. Size: 624.38M
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.13721.remat_compressed = copy(fusion.27401)
+     Allocation type: HLO temp
+     ==========================
+  4. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/22/22/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22380 = fusion(fusion.7480.remat, get-tuple-element.13471, get-tuple-element.13469, get-tuple-element.13473, ...(+5)), kind=kOutput, calls=fused_computation.20608
+     Allocation type: HLO temp
+     ==========================
+  5. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/44/44/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22336 = fusion(fusion.7414.remat, get-tuple-element.13855, get-tuple-element.13853, get-tuple-element.13857, ...(+5)), kind=kOutput, calls=fused_computation.20564
+     Allocation type: HLO temp
+     ==========================
+  6. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/39/39/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22346 = fusion(fusion.7429.remat, get-tuple-element.13759, get-tuple-element.13757, get-tuple-element.13761, ...(+5)), kind=kOutput, calls=fused_computation.20574
+     Allocation type: HLO temp
+     ==========================
+  7. Size: 624.38M
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: copy.8318 = copy(slice.408)
+     Allocation type: HLO temp
+     ==========================
+  8. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/feature_projection/projection/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22426 = fusion(copy.8318, get-tuple-element.13989, fusion.10472, get-tuple-element.10358, ...(+9)), kind=kOutput, calls=fused_computation.20654
+     Allocation type: HLO temp
+     ==========================
+  9. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/46/46/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22332 = fusion(fusion.7408.remat, get-tuple-element.13887, get-tuple-element.13885, get-tuple-element.13889, ...(+5)), kind=kOutput, calls=fused_computation.20560
+     Allocation type: HLO temp
+     ==========================
+  10. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22424 = fusion(get-tuple-element.11275, get-tuple-element.13231, get-tuple-element.13229, get-tuple-element.13233, ...(+5)), kind=kOutput, calls=fused_computation.20652
+     Allocation type: HLO temp
+     ==========================
+  11. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22422 = fusion(fusion.7543.remat, get-tuple-element.13247, get-tuple-element.13245, get-tuple-element.13249, ...(+5)), kind=kOutput, calls=fused_computation.20650
+     Allocation type: HLO temp
+     ==========================
+  12. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22420 = fusion(fusion.7540.remat, get-tuple-element.13423, get-tuple-element.13421, get-tuple-element.13425, ...(+5)), kind=kOutput, calls=fused_computation.20648
+     Allocation type: HLO temp
+     ==========================
+  13. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22418 = fusion(fusion.7537.remat, get-tuple-element.13599, get-tuple-element.13597, get-tuple-element.13601, ...(+5)), kind=kOutput, calls=fused_computation.20646
+     Allocation type: HLO temp
+     ==========================
+  14. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22416 = fusion(fusion.7534.remat, get-tuple-element.13775, get-tuple-element.13773, get-tuple-element.13777, ...(+5)), kind=kOutput, calls=fused_computation.20644
+     Allocation type: HLO temp
+     ==========================
+  15. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22414 = fusion(fusion.7531.remat, get-tuple-element.13919, get-tuple-element.13917, get-tuple-element.13921, ...(+5)), kind=kOutput, calls=fused_computation.20642
+     Allocation type: HLO temp
+     ==========================
+  16. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22412 = fusion(fusion.7528.remat, get-tuple-element.13935, get-tuple-element.13933, get-tuple-element.13937, ...(+5)), kind=kOutput, calls=fused_computation.20640
+     Allocation type: HLO temp
+     ==========================
+  17. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22410 = fusion(fusion.7525.remat, get-tuple-element.13951, get-tuple-element.13949, get-tuple-element.13953, ...(+5)), kind=kOutput, calls=fused_computation.20638
+     Allocation type: HLO temp
+     ==========================
+  18. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22408 = fusion(fusion.7522.remat, get-tuple-element.13967, get-tuple-element.13965, get-tuple-element.13969, ...(+5)), kind=kOutput, calls=fused_computation.20636
+     Allocation type: HLO temp
+     ==========================
+  19. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22406 = fusion(fusion.7519.remat, get-tuple-element.13983, get-tuple-element.13981, get-tuple-element.13985, ...(+5)), kind=kOutput, calls=fused_computation.20634
+     Allocation type: HLO temp
+     ==========================
+  20. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/10/10/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[128,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.22404 = fusion(fusion.7516.remat, get-tuple-element.13263, get-tuple-element.13261, get-tuple-element.13265, ...(+5)), kind=kOutput, calls=fused_computation.20632
+     Allocation type: HLO temp
+     ==========================
\ No newline at end of file
diff --git a/wandb/run-20220729_231127-1dfdwyjl/files/requirements.txt b/wandb/run-20220729_231127-1dfdwyjl/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab
--- /dev/null
+++ b/wandb/run-20220729_231127-1dfdwyjl/files/requirements.txt
@@ -0,0 +1,158 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+exceptiongroup==1.0.0rc8
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+hypothesis==6.53.0
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+jiwer==2.3.0
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pyctcdecode==0.4.0
+pygments==2.11.1
+pygtrie==2.5.0
+pyparsing==3.0.6
+python-dateutil==2.8.2
+python-levenshtein==0.12.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+sortedcontainers==2.4.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220729_231127-1dfdwyjl/files/wandb-metadata.json b/wandb/run-20220729_231127-1dfdwyjl/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..49ba9fbd8b3a20b020fba53a66523f5898978f6a
--- /dev/null
+++ b/wandb/run-20220729_231127-1dfdwyjl/files/wandb-metadata.json
@@ -0,0 +1,66 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-29T23:11:31.214337",
+    "startedAt": "2022-07-29T23:11:27.767847",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=./",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=128",
+        "--per_device_eval_batch_size=128",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220729_231127-1dfdwyjl/files/wandb-summary.json b/wandb/run-20220729_231127-1dfdwyjl/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..0e9bee84d7792f9ed29dd896dbf35c24f9be20ae
--- /dev/null
+++ b/wandb/run-20220729_231127-1dfdwyjl/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 1042}}
\ No newline at end of file
diff --git a/wandb/run-20220729_231127-1dfdwyjl/logs/debug-internal.log b/wandb/run-20220729_231127-1dfdwyjl/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..00862f1e1cbace2cb76a5e62ee06c815e759083b
--- /dev/null
+++ b/wandb/run-20220729_231127-1dfdwyjl/logs/debug-internal.log
@@ -0,0 +1,579 @@
+2022-07-29 23:11:28,666 INFO    MainThread:2561171 [internal.py:wandb_internal():87] W&B internal server running at pid: 2561171, started at: 2022-07-29 23:11:28.666358
+2022-07-29 23:11:28,668 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: check_version
+2022-07-29 23:11:28,669 INFO    WriterThread:2561171 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/run-1dfdwyjl.wandb
+2022-07-29 23:11:28,669 DEBUG   SenderThread:2561171 [sender.py:send():234] send: header
+2022-07-29 23:11:28,670 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: check_version
+2022-07-29 23:11:28,721 DEBUG   SenderThread:2561171 [sender.py:send():234] send: run
+2022-07-29 23:11:28,892 INFO    SenderThread:2561171 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files
+2022-07-29 23:11:28,892 INFO    SenderThread:2561171 [sender.py:_start_run_threads():804] run started: 1dfdwyjl with start time 1659136287
+2022-07-29 23:11:28,893 DEBUG   SenderThread:2561171 [sender.py:send():234] send: summary
+2022-07-29 23:11:28,893 INFO    SenderThread:2561171 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 23:11:28,895 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: run_start
+2022-07-29 23:11:29,894 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/wandb-summary.json
+2022-07-29 23:11:31,214 DEBUG   HandlerThread:2561171 [meta.py:__init__():40] meta init
+2022-07-29 23:11:31,214 DEBUG   HandlerThread:2561171 [meta.py:__init__():54] meta init done
+2022-07-29 23:11:31,214 DEBUG   HandlerThread:2561171 [meta.py:probe():214] probe
+2022-07-29 23:11:31,215 DEBUG   HandlerThread:2561171 [meta.py:_setup_git():204] setup git
+2022-07-29 23:11:31,252 DEBUG   HandlerThread:2561171 [meta.py:_setup_git():211] setup git done
+2022-07-29 23:11:31,253 DEBUG   HandlerThread:2561171 [meta.py:_save_code():92] save code
+2022-07-29 23:11:31,266 DEBUG   HandlerThread:2561171 [meta.py:_save_code():113] save code done
+2022-07-29 23:11:31,266 DEBUG   HandlerThread:2561171 [meta.py:_save_patches():130] save patches
+2022-07-29 23:11:31,326 DEBUG   HandlerThread:2561171 [meta.py:_save_patches():172] save patches done
+2022-07-29 23:11:31,326 DEBUG   HandlerThread:2561171 [meta.py:_save_pip():58] save pip
+2022-07-29 23:11:31,327 DEBUG   HandlerThread:2561171 [meta.py:_save_pip():72] save pip done
+2022-07-29 23:11:31,327 DEBUG   HandlerThread:2561171 [meta.py:probe():252] probe done
+2022-07-29 23:11:31,330 DEBUG   SenderThread:2561171 [sender.py:send():234] send: files
+2022-07-29 23:11:31,330 INFO    SenderThread:2561171 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-29 23:11:31,331 INFO    SenderThread:2561171 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-29 23:11:31,338 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:11:31,338 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:11:31,837 INFO    Thread-11 :2561171 [upload_job.py:push():137] Uploaded file /tmp/tmprdbwdzkmwandb/1i44h917-wandb-metadata.json
+2022-07-29 23:11:31,895 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:11:31,895 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/requirements.txt
+2022-07-29 23:11:31,895 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/wandb-metadata.json
+2022-07-29 23:11:31,895 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/code/run_flax_speech_recognition_ctc.py
+2022-07-29 23:11:31,895 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/code
+2022-07-29 23:11:32,052 INFO    Thread-12 :2561171 [upload_job.py:push():137] Uploaded file /tmp/tmprdbwdzkmwandb/2vstmlwc-code/run_flax_speech_recognition_ctc.py
+2022-07-29 23:11:33,896 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:11:35,897 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:11:37,897 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:11:39,898 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:11:45,901 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:11:46,519 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:11:46,519 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:11:47,902 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:11:59,299 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:11:59,907 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:12:01,743 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:12:01,743 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:12:01,908 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:12:12,914 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:12:14,915 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:12:16,897 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:12:16,897 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:12:28,922 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:12:29,375 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:12:30,923 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:12:32,034 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:12:32,034 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:12:47,188 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:12:47,189 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:12:59,451 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:13:02,378 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:13:02,378 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:13:13,945 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:15,946 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:17,785 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:13:17,785 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:13:17,947 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:19,948 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:21,949 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:23,950 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:25,951 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:27,952 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:29,520 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:13:29,953 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:31,954 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:32,981 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:13:32,981 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:13:33,955 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:35,956 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:37,957 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:39,958 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:41,959 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:43,960 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:45,961 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:47,962 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:48,142 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:13:48,142 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:13:49,963 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:51,964 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:53,965 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:55,966 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:57,967 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:13:59,601 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:13:59,968 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:01,969 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:03,286 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:14:03,287 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:14:03,970 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:05,971 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:07,972 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:09,973 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:11,974 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:14,056 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:16,057 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:18,134 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:18,441 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:14:18,441 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:14:20,135 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:22,136 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:24,137 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:26,138 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:28,139 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:29,967 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:14:30,140 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:32,141 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:33,728 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:14:33,728 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:14:34,141 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:36,142 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:38,144 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:40,145 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:42,145 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:44,146 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:46,147 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:48,148 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:48,887 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:14:48,888 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:14:50,149 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:52,150 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:54,151 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:56,152 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:14:58,153 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:00,038 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:15:00,154 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:02,195 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:04,029 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:15:04,030 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:15:04,158 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:06,159 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:08,161 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:10,161 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:12,163 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:14,164 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:16,165 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:18,166 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:19,192 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:15:19,192 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:15:20,167 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:22,169 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:24,170 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:26,171 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:28,172 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:30,122 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:15:30,173 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:32,174 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:34,176 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:34,330 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:15:34,330 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:15:36,177 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:38,178 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:40,179 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:42,180 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:44,184 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:47,185 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:49,186 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:49,502 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:15:49,502 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:15:51,188 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:53,190 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:55,192 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:57,192 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:15:59,194 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:00,208 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:16:01,196 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:03,197 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:04,655 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:16:04,655 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:16:05,198 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:07,199 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:09,200 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:11,201 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:13,202 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:15,203 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:17,204 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:19,205 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:19,858 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:16:19,859 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:16:21,206 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:23,207 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:25,207 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:27,208 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:29,210 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:30,303 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:16:31,211 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:33,212 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:35,054 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:16:35,054 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:16:35,213 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:37,214 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:39,215 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:41,216 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:43,217 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:45,218 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:47,219 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:49,220 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:50,201 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:16:50,201 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:16:51,221 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:53,222 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:55,223 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:57,224 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:16:59,225 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:00,386 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:17:01,226 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:03,227 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:05,228 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:05,345 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:17:05,345 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:17:07,229 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:09,231 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:11,232 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:13,233 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:15,234 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:17,235 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:19,236 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:20,489 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:17:20,490 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:17:21,237 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:23,238 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:25,239 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:27,240 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:29,241 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:30,478 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:17:31,242 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:33,243 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:35,245 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:35,630 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:17:35,631 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:17:37,246 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:39,247 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:41,248 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:43,249 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:45,250 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:47,251 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:49,254 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:50,775 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:17:50,776 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:17:51,255 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:53,256 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:55,257 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:57,258 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:17:59,259 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:00,571 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:18:01,260 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:03,261 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:05,261 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:05,946 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:18:05,946 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:18:07,263 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:09,264 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:11,265 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:13,266 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:15,267 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:17,268 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:19,269 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:21,108 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:18:21,108 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:18:21,272 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:23,273 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:25,274 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:27,275 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:29,275 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:30,646 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:18:31,276 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:33,278 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:35,279 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:36,273 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:18:36,273 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:18:37,281 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:39,282 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:41,283 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:43,284 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:45,285 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:47,285 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:49,286 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:18:51,439 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:18:51,440 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:19:00,714 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:19:06,576 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:19:06,576 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:19:20,300 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:21,794 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:19:21,795 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:19:22,300 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:24,301 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:26,302 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:28,303 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:30,304 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:30,785 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:19:32,305 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:34,306 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:36,307 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:36,936 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:19:36,937 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:19:38,308 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:40,309 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:42,310 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:44,311 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:46,312 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:48,313 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:50,314 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:52,084 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:19:52,084 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:19:54,370 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:56,370 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:19:58,371 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:20:00,372 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:20:00,889 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:20:02,373 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:20:07,220 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:20:07,221 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:20:22,356 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:20:22,356 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:20:30,956 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:20:37,669 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:20:37,670 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:20:40,389 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:20:42,390 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:20:44,391 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:20:46,392 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:20:48,392 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:20:50,393 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:20:52,394 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:20:52,864 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:20:52,865 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:20:54,395 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:20:56,396 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:20:58,397 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:00,398 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:01,034 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:21:02,399 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:04,399 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:06,400 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:08,007 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:21:08,007 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:21:08,402 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:10,404 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:12,405 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:14,406 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:16,407 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:18,408 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:20,409 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:22,410 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:23,145 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:21:23,146 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:21:24,411 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:26,412 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:28,413 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:30,414 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:31,116 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:21:32,414 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:34,415 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:36,417 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:38,285 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:21:38,286 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:21:38,418 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:40,419 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:42,420 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:44,421 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:46,422 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:48,423 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:50,424 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:52,425 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:53,463 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:21:53,464 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:21:54,426 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:56,427 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:21:58,430 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:22:00,431 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:22:01,191 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:22:08,606 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:22:08,606 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:22:23,745 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:22:23,745 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:22:31,267 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:22:38,973 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:22:38,974 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:22:43,448 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:22:45,449 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:22:51,452 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:22:54,350 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:22:54,350 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:22:59,455 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:23:01,342 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:23:06,458 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:23:09,699 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:23:09,700 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:23:12,461 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:23:20,465 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:23:22,466 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:23:24,891 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:23:24,891 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:23:26,467 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:23:30,469 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:23:31,417 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:23:33,470 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:23:40,452 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:23:40,453 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:23:41,474 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:23:56,024 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:23:56,025 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:24:01,498 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:24:11,418 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:24:11,419 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:24:26,889 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:24:26,890 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:24:31,572 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:24:42,392 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:24:42,392 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:24:58,028 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:24:58,028 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:25:01,645 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:25:13,350 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:25:13,350 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:25:20,516 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:25:28,503 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:25:28,504 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:25:31,721 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:25:43,990 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:25:43,990 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:25:53,530 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:25:59,162 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:25:59,162 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:26:01,533 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:26:01,794 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:26:03,534 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:26:14,326 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:26:14,326 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:26:29,473 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:26:29,473 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:26:31,872 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:26:44,613 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:26:44,613 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:26:59,751 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:26:59,751 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:27:01,950 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:27:14,936 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:27:14,937 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:27:30,071 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:27:30,071 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:27:32,027 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:27:45,210 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:27:45,210 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:28:00,347 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:28:00,347 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:28:02,106 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:28:15,480 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:28:15,480 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:28:30,782 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:28:30,782 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:28:32,182 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:28:45,925 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-29 23:28:45,925 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: stop_status
+2022-07-29 23:28:50,603 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:28:51,678 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:28:51,679 DEBUG   SenderThread:2561171 [sender.py:send():234] send: telemetry
+2022-07-29 23:28:51,679 DEBUG   SenderThread:2561171 [sender.py:send():234] send: exit
+2022-07-29 23:28:51,679 INFO    SenderThread:2561171 [sender.py:send_exit():366] handling exit code: 1
+2022-07-29 23:28:51,679 INFO    SenderThread:2561171 [sender.py:send_exit():368] handling runtime: 1042
+2022-07-29 23:28:51,680 INFO    SenderThread:2561171 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 23:28:51,680 INFO    SenderThread:2561171 [sender.py:send_exit():374] send defer
+2022-07-29 23:28:51,680 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:28:51,681 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:28:51,681 INFO    HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-29 23:28:51,681 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:28:51,681 INFO    SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-29 23:28:51,681 INFO    SenderThread:2561171 [sender.py:transition_state():387] send defer: 1
+2022-07-29 23:28:51,682 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:28:51,682 INFO    HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-29 23:28:51,730 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:28:51,730 INFO    SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-29 23:28:51,730 INFO    SenderThread:2561171 [sender.py:transition_state():387] send defer: 2
+2022-07-29 23:28:51,731 DEBUG   SenderThread:2561171 [sender.py:send():234] send: stats
+2022-07-29 23:28:51,731 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:28:51,731 INFO    HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-29 23:28:51,731 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:28:51,731 INFO    SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-29 23:28:51,731 INFO    SenderThread:2561171 [sender.py:transition_state():387] send defer: 3
+2022-07-29 23:28:51,732 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:28:51,732 INFO    HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-29 23:28:51,732 DEBUG   SenderThread:2561171 [sender.py:send():234] send: summary
+2022-07-29 23:28:51,732 INFO    SenderThread:2561171 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-29 23:28:51,732 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:28:51,732 INFO    SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-29 23:28:51,732 INFO    SenderThread:2561171 [sender.py:transition_state():387] send defer: 4
+2022-07-29 23:28:51,732 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:28:51,732 INFO    HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-29 23:28:51,733 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:28:51,733 INFO    SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-29 23:28:51,783 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:28:52,053 INFO    SenderThread:2561171 [sender.py:transition_state():387] send defer: 5
+2022-07-29 23:28:52,053 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:28:52,054 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:28:52,054 INFO    HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-29 23:28:52,054 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:28:52,054 INFO    SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-29 23:28:52,054 INFO    SenderThread:2561171 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-29 23:28:52,155 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:28:52,604 INFO    Thread-8  :2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:28:52,605 INFO    SenderThread:2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/wandb-summary.json
+2022-07-29 23:28:52,605 INFO    SenderThread:2561171 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/config.yaml
+2022-07-29 23:28:52,605 INFO    SenderThread:2561171 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files
+2022-07-29 23:28:52,605 INFO    SenderThread:2561171 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/config.yaml config.yaml
+2022-07-29 23:28:52,605 INFO    SenderThread:2561171 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/requirements.txt requirements.txt
+2022-07-29 23:28:52,606 INFO    SenderThread:2561171 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log output.log
+2022-07-29 23:28:52,606 INFO    SenderThread:2561171 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/wandb-summary.json wandb-summary.json
+2022-07-29 23:28:52,610 INFO    SenderThread:2561171 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/wandb-metadata.json wandb-metadata.json
+2022-07-29 23:28:52,610 INFO    SenderThread:2561171 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-29 23:28:52,610 INFO    SenderThread:2561171 [sender.py:transition_state():387] send defer: 6
+2022-07-29 23:28:52,610 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:28:52,611 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:28:52,611 INFO    HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-29 23:28:52,611 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:28:52,612 INFO    SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-29 23:28:52,612 INFO    SenderThread:2561171 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 23:28:52,713 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:28:52,713 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:28:52,816 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:28:52,816 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:28:52,918 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:28:52,918 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:28:53,020 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:28:53,020 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:28:53,079 INFO    Thread-14 :2561171 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/requirements.txt
+2022-07-29 23:28:53,111 INFO    Thread-13 :2561171 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/config.yaml
+2022-07-29 23:28:53,122 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:28:53,122 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:28:53,141 INFO    Thread-16 :2561171 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/wandb-summary.json
+2022-07-29 23:28:53,224 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:28:53,224 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:28:53,325 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:28:53,326 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:28:53,341 INFO    Thread-15 :2561171 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/files/output.log
+2022-07-29 23:28:53,427 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:28:53,427 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:28:53,529 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:28:53,529 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:28:53,541 INFO    Thread-7  :2561171 [sender.py:transition_state():387] send defer: 7
+2022-07-29 23:28:53,542 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:28:53,542 INFO    HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-29 23:28:53,542 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:28:53,542 INFO    SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-29 23:28:53,631 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:28:54,054 INFO    SenderThread:2561171 [sender.py:transition_state():387] send defer: 8
+2022-07-29 23:28:54,055 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:28:54,055 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:28:54,055 INFO    HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-29 23:28:54,056 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:28:54,056 INFO    SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-29 23:28:54,056 INFO    SenderThread:2561171 [sender.py:transition_state():387] send defer: 9
+2022-07-29 23:28:54,056 DEBUG   SenderThread:2561171 [sender.py:send():234] send: final
+2022-07-29 23:28:54,056 DEBUG   SenderThread:2561171 [sender.py:send():234] send: footer
+2022-07-29 23:28:54,057 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: defer
+2022-07-29 23:28:54,057 INFO    HandlerThread:2561171 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-29 23:28:54,057 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: defer
+2022-07-29 23:28:54,057 INFO    SenderThread:2561171 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-29 23:28:54,157 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-29 23:28:54,157 DEBUG   SenderThread:2561171 [sender.py:send_request():248] send_request: poll_exit
+2022-07-29 23:28:54,157 INFO    SenderThread:2561171 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 23:28:54,423 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-29 23:28:54,424 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-29 23:28:54,424 DEBUG   HandlerThread:2561171 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-29 23:28:54,424 INFO    HandlerThread:2561171 [handler.py:finish():731] shutting down handler
+2022-07-29 23:28:55,057 INFO    WriterThread:2561171 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/run-1dfdwyjl.wandb
+2022-07-29 23:28:55,422 INFO    SenderThread:2561171 [sender.py:finish():1070] shutting down sender
+2022-07-29 23:28:55,422 INFO    SenderThread:2561171 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-29 23:28:55,422 INFO    SenderThread:2561171 [file_pusher.py:join():182] waiting for file pusher
+2022-07-29 23:28:55,425 INFO    MainThread:2561171 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220729_231127-1dfdwyjl/logs/debug.log b/wandb/run-20220729_231127-1dfdwyjl/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..c50882c2a3cd04c3b19e693b0fe1bd34c7daea65
--- /dev/null
+++ b/wandb/run-20220729_231127-1dfdwyjl/logs/debug.log
@@ -0,0 +1,157 @@
+2022-07-29 23:11:27,769 INFO    MainThread:2559923 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-29 23:11:27,769 INFO    MainThread:2559923 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-29 23:11:27,769 INFO    MainThread:2559923 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/logs/debug.log
+2022-07-29 23:11:27,769 INFO    MainThread:2559923 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220729_231127-1dfdwyjl/logs/debug-internal.log
+2022-07-29 23:11:27,769 INFO    MainThread:2559923 [wandb_init.py:init():404] calling init triggers
+2022-07-29 23:11:27,769 INFO    MainThread:2559923 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-29 23:11:27,769 INFO    MainThread:2559923 [wandb_init.py:init():460] starting backend
+2022-07-29 23:11:27,770 INFO    MainThread:2559923 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-29 23:11:27,816 INFO    MainThread:2559923 [backend.py:ensure_launched():216] starting backend process...
+2022-07-29 23:11:27,860 INFO    MainThread:2559923 [backend.py:ensure_launched():221] started backend process with pid: 2561171
+2022-07-29 23:11:27,862 INFO    MainThread:2559923 [wandb_init.py:init():469] backend started and connected
+2022-07-29 23:11:27,876 INFO    MainThread:2559923 [wandb_init.py:init():533] updated telemetry
+2022-07-29 23:11:27,988 INFO    MainThread:2559923 [wandb_init.py:init():563] communicating current version
+2022-07-29 23:11:28,720 INFO    MainThread:2559923 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-29 23:11:28,720 INFO    MainThread:2559923 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-29 23:11:28,894 INFO    MainThread:2559923 [wandb_init.py:init():606] starting run threads in backend
+2022-07-29 23:11:31,333 INFO    MainThread:2559923 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-29 23:11:31,334 INFO    MainThread:2559923 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-29 23:11:31,335 INFO    MainThread:2559923 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-29 23:11:31,337 INFO    MainThread:2559923 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-29 23:11:31,337 INFO    MainThread:2559923 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-29 23:28:49,333 INFO    MainThread:2559923 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-29 23:28:49,339 INFO    MainThread:2559923 [wandb_run.py:_restore():1752] restore
+2022-07-29 23:28:51,681 INFO    MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73918
+  total_bytes: 73918
+}
+
+2022-07-29 23:28:52,054 INFO    MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 1
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73918
+  total_bytes: 73918
+}
+
+2022-07-29 23:28:52,612 INFO    MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73918
+  total_bytes: 372393
+}
+
+2022-07-29 23:28:52,715 INFO    MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 73918
+  total_bytes: 372393
+}
+
+2022-07-29 23:28:52,817 INFO    MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 372393
+  total_bytes: 372393
+}
+
+2022-07-29 23:28:52,919 INFO    MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 372393
+  total_bytes: 372393
+}
+
+2022-07-29 23:28:53,021 INFO    MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 372393
+  total_bytes: 372393
+}
+
+2022-07-29 23:28:53,123 INFO    MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 372393
+  total_bytes: 372393
+}
+
+2022-07-29 23:28:53,224 INFO    MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 372393
+  total_bytes: 372393
+}
+
+2022-07-29 23:28:53,326 INFO    MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 372393
+  total_bytes: 372393
+}
+
+2022-07-29 23:28:53,428 INFO    MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 372393
+  total_bytes: 372393
+}
+
+2022-07-29 23:28:53,530 INFO    MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 372393
+  total_bytes: 372393
+}
+
+2022-07-29 23:28:54,056 INFO    MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 372393
+  total_bytes: 372393
+}
+
+2022-07-29 23:28:54,422 INFO    MainThread:2559923 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 372393
+  total_bytes: 372393
+}
+local_info {
+}
+
+2022-07-29 23:28:56,009 INFO    MainThread:2559923 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220729_231127-1dfdwyjl/run-1dfdwyjl.wandb b/wandb/run-20220729_231127-1dfdwyjl/run-1dfdwyjl.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..cfe62d5f00c04d5c6cdf40ed10b4bb46f6b01776
--- /dev/null
+++ b/wandb/run-20220729_231127-1dfdwyjl/run-1dfdwyjl.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7c5ac63cb533b27df989c5265415ce13e339d5f8c056e1f6c9a0fc3902ba3e4
+size 424833
diff --git a/wandb/run-20220730_074919-12xoayks/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_074919-12xoayks/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac
--- /dev/null
+++ b/wandb/run-20220730_074919-12xoayks/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1605 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220730_074919-12xoayks/files/config.yaml b/wandb/run-20220730_074919-12xoayks/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..795906cc3387404bc5273c1d23fc6ac7f3018d93
--- /dev/null
+++ b/wandb/run-20220730_074919-12xoayks/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659167359
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220730_074919-12xoayks/files/diff.patch b/wandb/run-20220730_074919-12xoayks/files/diff.patch
new file mode 100644
index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642
--- /dev/null
+++ b/wandb/run-20220730_074919-12xoayks/files/diff.patch
@@ -0,0 +1,10 @@
+diff --git a/.gitattributes b/.gitattributes
+index 755356a..f0eef4b 100644
+--- a/.gitattributes
++++ b/.gitattributes
+@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+ *.zip filter=lfs diff=lfs merge=lfs -text
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
++*.wandb filter=lfs diff=lfs merge=lfs -text
+\ No newline at end of file
diff --git a/wandb/run-20220730_074919-12xoayks/files/output.log b/wandb/run-20220730_074919-12xoayks/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..1f32bffe1ae0a93a1b1289f1eea0fe5071c9d9fd
--- /dev/null
+++ b/wandb/run-20220730_074919-12xoayks/files/output.log
@@ -0,0 +1,1578 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul30_07-49-15_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=32,
+per_device_train_batch_size=32,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 76.71it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 439.12it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_hid', 'bias'), ('project_q', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('quantizer', 'codevectors'), ('project_q', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('project_hid', 'kernel')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #0:  77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                             | 7328/9523 [00:00<00:00, 6747.47ex/s]
+removing punctuation from train split #1:  76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                              | 7280/9523 [00:00<00:00, 8744.68ex/s]
+removing punctuation from train split #2:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                           | 7416/9523 [00:00<00:00, 9086.47ex/s]
+removing punctuation from train split #3:  68%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                              | 6487/9523 [00:00<00:00, 9377.94ex/s]
+removing punctuation from train split #4:  68%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                | 6434/9523 [00:00<00:00, 9148.65ex/s]
+removing punctuation from train split #5:  27%|█████████████████████████████████████████████████████▌                                                                                                                                               | 2592/9523 [00:00<00:00, 8716.13ex/s]
+removing punctuation from train split #6:  28%|██████████████████████████████████████████████████████▋                                                                                                                                              | 2641/9523 [00:00<00:00, 8868.89ex/s]
+removing punctuation from train split #7:  27%|████████████████████████████████████████████████████▉                                                                                                                                                | 2557/9523 [00:00<00:00, 8604.97ex/s]
+removing punctuation from train split #8:  18%|███████████████████████████████████▍                                                                                                                                                                 | 1716/9523 [00:00<00:00, 8626.61ex/s]
+removing punctuation from train split #9:  17%|█████████████████████████████████▏                                                                                                                                                                   | 1606/9523 [00:00<00:00, 8123.25ex/s]
+removing punctuation from train split #10:   9%|█████████████████▏                                                                                                                                                                                   | 828/9523 [00:00<00:01, 8276.53ex/s]
+removing punctuation from train split #11:   8%|████████████████▎                                                                                                                                                                                    | 788/9523 [00:00<00:01, 7870.27ex/s]
+removing punctuation from train split #12:   0%|                                                                                                                                                                                                                 | 0/9522 [00:00<?, ?ex/s]
+removing punctuation from train split #14: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8754.59ex/s]
+removing punctuation from train split #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8263.91ex/s]
+removing punctuation from train split #8:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                | 8718/9523 [00:01<00:00, 8035.30ex/s]
+removing punctuation from train split #9:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                     | 8501/9523 [00:01<00:00, 7995.94ex/s]
+removing punctuation from train split #12:  63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                        | 6000/9522 [00:00<00:00, 8680.00ex/s]
+removing punctuation from train split #9:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍   | 9351/9523 [00:01<00:00, 8143.04ex/s]
+removing punctuation from train split #10:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                 | 8676/9523 [00:01<00:00, 8181.76ex/s]
+removing punctuation from train split #11:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                     | 8500/9523 [00:01<00:00, 7950.02ex/s]
+removing punctuation from train split #12:  81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                    | 7740/9522 [00:00<00:00, 7930.45ex/s]
+removing punctuation from train split #11:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 9338/9523 [00:01<00:00, 8075.10ex/s]
+removing punctuation from train split #12:  90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                  | 8611/9522 [00:01<00:00, 8154.32ex/s]
+removing punctuation from train split #12: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 9484/9522 [00:01<00:00, 8322.62ex/s]
+removing punctuation from train split #13:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 9357/9522 [00:01<00:00, 8223.37ex/s]
+removing punctuation from train split #14:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍          | 9009/9522 [00:01<00:00, 9365.67ex/s]
+removing punctuation from train split #15:  79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                         | 7506/9522 [00:00<00:00, 7767.80ex/s]
+removing punctuation from train split #19:  43%|████████████████████████████████████████████████████████████████████████████████████▋                                                                                                               | 4112/9522 [00:00<00:00, 8348.51ex/s]
+removing punctuation from train split #17:  70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                           | 6637/9522 [00:00<00:00, 8479.56ex/s]
+removing punctuation from train split #15:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉      | 9225/9522 [00:01<00:00, 8176.12ex/s]
+removing punctuation from train split #16:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                   | 8570/9522 [00:01<00:00, 8166.00ex/s]
+removing punctuation from train split #17:  79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                          | 7485/9522 [00:00<00:00, 7744.35ex/s]
+removing punctuation from train split #18:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                       | 7606/9522 [00:00<00:00, 8049.49ex/s]
+removing punctuation from train split #19:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                         | 6720/9522 [00:00<00:00, 8601.33ex/s]
+removing punctuation from train split #20:  71%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                        | 6759/9522 [00:00<00:00, 8580.86ex/s]
+removing punctuation from train split #21:  49%|███████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                                    | 4657/9522 [00:00<00:00, 7617.44ex/s]
+removing punctuation from train split #22:  53%|████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                           | 5071/9522 [00:00<00:00, 8572.92ex/s]
+removing punctuation from train split #23:  54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                          | 5123/9522 [00:00<00:00, 8631.98ex/s]
+removing punctuation from train split #24:  44%|██████████████████████████████████████████████████████████████████████████████████████▊                                                                                                             | 4216/9522 [00:00<00:00, 8560.20ex/s]
+removing punctuation from train split #25:  35%|████████████████████████████████████████████████████████████████████▌                                                                                                                               | 3331/9522 [00:00<00:00, 8425.25ex/s]
+removing punctuation from train split #26:  26%|██████████████████████████████████████████████████▋                                                                                                                                                 | 2463/9522 [00:00<00:00, 8303.41ex/s]
+removing punctuation from train split #27:  26%|███████████████████████████████████████████████████▉                                                                                                                                                | 2523/9522 [00:00<00:00, 8501.77ex/s]
+removing punctuation from train split #28:  18%|██████████████████████████████████▎                                                                                                                                                                 | 1669/9522 [00:00<00:00, 7985.07ex/s]
+removing punctuation from train split #29:  17%|█████████████████████████████████▊                                                                                                                                                                  | 1642/9522 [00:00<00:00, 8269.44ex/s]
+removing punctuation from train split #30:   8%|███████████████▊                                                                                                                                                                                     | 765/9522 [00:00<00:01, 7641.70ex/s]
+removing punctuation from train split #31:   8%|████████████████                                                                                                                                                                                     | 778/9522 [00:00<00:01, 7769.93ex/s]
+removing punctuation from train split #30:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏            | 8899/9522 [00:01<00:00, 8827.39ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00019_of_00032.arrow8836/9522 [00:01<00:00, 8887.16ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow
+preprocess dataset #0:   0%|▏                                                                                                                                                                                                                            | 6/9497 [00:01<26:07,  6.05ex/s]
+preprocess dataset #1:   0%|                                                                                                                                                                                                                                     | 0/9497 [00:00<?, ?ex/s]
+preprocess dataset #2:   0%|▉                                                                                                                                                                                                                           | 41/9497 [00:02<03:41, 42.67ex/s]
+preprocess dataset #3:   0%|▍                                                                                                                                                                                                                           | 18/9497 [00:01<07:43, 20.45ex/s]
+preprocess dataset #4:   1%|██▌                                                                                                                                                                                                                        | 109/9497 [00:03<03:23, 46.10ex/s]
+preprocess dataset #5:   0%|▌                                                                                                                                                                                                                           | 25/9497 [00:01<06:37, 23.82ex/s]
+preprocess dataset #6:   0%|                                                                                                                                                                                                                           | 1/9497 [00:01<3:27:13,  1.31s/ex]
+preprocess dataset #7:   1%|█▊                                                                                                                                                                                                                          | 78/9497 [00:03<03:50, 40.90ex/s]
+preprocess dataset #8:   0%|▌                                                                                                                                                                                                                           | 22/9497 [00:01<07:11, 21.95ex/s]
+preprocess dataset #9:   0%|                                                                                                                                                                                                                                     | 0/9497 [00:00<?, ?ex/s]
+preprocess dataset #10:   0%|▌                                                                                                                                                                                                                          | 26/9497 [00:01<05:59, 26.34ex/s]
+preprocess dataset #11:   0%|                                                                                                                                                                                                                                    | 0/9496 [00:00<?, ?ex/s]
+preprocess dataset #12:   0%|▊                                                                                                                                                                                                                          | 34/9496 [00:01<04:58, 31.69ex/s]
+preprocess dataset #13:   0%|                                                                                                                                                                                                                                    | 0/9496 [00:00<?, ?ex/s]
+preprocess dataset #13:   1%|██▋                                                                                                                                                                                                                       | 119/9496 [00:03<03:30, 44.46ex/s]
+preprocess dataset #14:   1%|█▏                                                                                                                                                                                                                         | 49/9496 [00:02<04:45, 33.08ex/s]
+preprocess dataset #15:   0%|▋                                                                                                                                                                                                                          | 28/9496 [00:01<05:31, 28.58ex/s]
+preprocess dataset #16:   1%|█▍                                                                                                                                                                                                                         | 62/9496 [00:02<04:25, 35.48ex/s]
+preprocess dataset #17:   0%|▍                                                                                                                                                                                                                          | 17/9496 [00:01<08:43, 18.10ex/s]
+preprocess dataset #18:   0%|                                                                                                                                                                                                                                    | 0/9496 [00:00<?, ?ex/s]
+preprocess dataset #19:   0%|▌                                                                                                                                                                                                                          | 25/9496 [00:01<05:38, 27.99ex/s]
+preprocess dataset #20:   1%|█▊                                                                                                                                                                                                                         | 80/9496 [00:03<04:40, 33.57ex/s]
+preprocess dataset #21:   1%|█▏                                                                                                                                                                                                                         | 53/9496 [00:02<04:44, 33.14ex/s]
+preprocess dataset #22:   0%|▌                                                                                                                                                                                                                          | 23/9496 [00:01<06:10, 25.60ex/s]
+preprocess dataset #23:   0%|▌                                                                                                                                                                                                                          | 24/9496 [00:01<08:10, 19.32ex/s]
+preprocess dataset #24:   1%|█▌                                                                                                                                                                                                                         | 66/9496 [00:03<05:15, 29.87ex/s]
+preprocess dataset #25:   0%|▊                                                                                                                                                                                                                          | 34/9496 [00:01<05:58, 26.41ex/s]
+preprocess dataset #26:   1%|█▌                                                                                                                                                                                                                         | 69/9496 [00:03<05:05, 30.86ex/s]
+preprocess dataset #27:   0%|▋                                                                                                                                                                                                                          | 29/9496 [00:01<05:30, 28.64ex/s]
+preprocess dataset #28:   0%|█                                                                                                                                                                                                                          | 46/9496 [00:02<08:34, 18.35ex/s]
+preprocess dataset #29:   0%|▍                                                                                                                                                                                                                          | 20/9496 [00:01<08:57, 17.62ex/s]
+preprocess dataset #30:   0%|█                                                                                                                                                                                                                          | 44/9496 [00:02<05:06, 30.82ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  19%|█████████████████████████████████████████▉                                                                                                                                                                               | 1833/9496 [01:04<04:14, 30.13ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #7:  62%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                  | 5923/9497 [03:17<01:25, 41.58ex/s]
+preprocess dataset #8:  65%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                            | 6161/9497 [03:16<02:19, 23.94ex/s]
+preprocess dataset #9:  64%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                              | 6104/9497 [03:15<02:22, 23.76ex/s]
+preprocess dataset #10:  64%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                              | 6077/9497 [03:15<02:17, 24.91ex/s]
+preprocess dataset #12:  64%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                               | 6034/9496 [03:13<02:29, 23.23ex/s]
+preprocess dataset #13:  63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                 | 5949/9496 [03:12<02:02, 29.01ex/s]
+preprocess dataset #14:  59%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                        | 5619/9496 [03:11<01:59, 32.49ex/s]
+preprocess dataset #15:  62%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                   | 5843/9496 [03:10<02:09, 28.20ex/s]
+preprocess dataset #16:  58%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                          | 5539/9496 [03:09<02:18, 28.48ex/s]
+preprocess dataset #17:  58%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                           | 5517/9496 [03:08<02:13, 29.90ex/s]
+preprocess dataset #18:  58%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                            | 5471/9496 [03:07<02:11, 30.58ex/s]
+preprocess dataset #19:  57%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                             | 5419/9496 [03:06<02:17, 29.61ex/s]
+preprocess dataset #20:  58%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                            | 5470/9496 [03:05<01:44, 38.65ex/s]
+preprocess dataset #21:  57%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                             | 5430/9496 [03:04<02:10, 31.15ex/s]
+preprocess dataset #22:  56%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                              | 5365/9496 [03:03<02:06, 32.62ex/s]
+preprocess dataset #23:  57%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                             | 5412/9496 [03:02<01:41, 40.12ex/s]
+preprocess dataset #24:  56%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                               | 5304/9496 [03:01<02:05, 33.32ex/s]
+preprocess dataset #25:  55%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                | 5264/9496 [03:00<02:36, 27.01ex/s]
+preprocess dataset #26:  54%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                  | 5175/9496 [02:59<02:33, 28.12ex/s]
+preprocess dataset #27:  54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                   | 5141/9496 [02:58<02:24, 30.12ex/s]
+preprocess dataset #28:  54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                   | 5150/9496 [02:57<02:37, 27.54ex/s]
+preprocess dataset #29:  54%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                    | 5102/9496 [02:56<02:25, 30.30ex/s]
+preprocess dataset #30:  54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                   | 5163/9496 [02:54<02:56, 24.58ex/s]
+
+
+preprocess dataset #31:  54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                   | 5159/9496 [02:55<02:07, 33.93ex/s]
+preprocess dataset #18:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                      | 8531/9496 [04:45<00:37, 25.63ex/s]
+preprocess dataset #24:  88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                          | 8332/9496 [04:39<00:40, 28.64ex/s]
+preprocess dataset #25:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                            | 8259/9496 [04:38<00:44, 27.91ex/s]
+preprocess dataset #26:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                              | 8167/9496 [04:37<00:50, 26.53ex/s]
+preprocess dataset #30:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                             | 8208/9496 [04:35<00:37, 34.11ex/s]
+
+preprocess dataset #30:  88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                          | 8351/9496 [04:39<00:33, 34.54ex/s]
+preprocess dataset #30:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                        | 8425/9496 [04:41<00:29, 36.82ex/s]
+preprocess dataset #28:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                      | 8510/9496 [04:45<00:23, 42.59ex/s]
+preprocess dataset #29:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                       | 8471/9496 [04:44<00:25, 40.18ex/s]
+preprocess dataset #30:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                       | 8489/9496 [04:43<00:28, 35.90ex/s]
+preprocess dataset #29:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                     | 8543/9496 [04:46<00:27, 34.83ex/s]
+preprocess dataset #30:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                     | 8562/9496 [04:45<00:24, 38.00ex/s]
+preprocess dataset #28:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                   | 8647/9496 [04:49<00:23, 36.40ex/s]
+preprocess dataset #29:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                    | 8618/9496 [04:48<00:22, 39.20ex/s]
+preprocess dataset #30:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                   | 8641/9496 [04:47<00:21, 40.34ex/s]
+preprocess dataset #29:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                  | 8707/9496 [04:50<00:20, 39.37ex/s]
+preprocess dataset #30:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                 | 8741/9496 [04:49<00:19, 39.71ex/s]
+preprocess dataset #29:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                | 8779/9496 [04:52<00:16, 42.79ex/s]
+preprocess dataset #30:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎               | 8812/9496 [04:51<00:15, 44.40ex/s]
+
+
+
+preprocess dataset #28:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████        | 9146/9496 [05:01<00:07, 47.75ex/s]
+preprocess dataset #29:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉         | 9100/9496 [05:00<00:07, 51.51ex/s]
+preprocess dataset #30:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████        | 9147/9496 [04:59<00:07, 48.68ex/s]
+preprocess dataset #28:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████      | 9234/9496 [05:03<00:06, 42.42ex/s]
+preprocess dataset #29:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉       | 9185/9496 [05:02<00:06, 45.28ex/s]
+preprocess dataset #30:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉      | 9229/9496 [05:01<00:05, 47.90ex/s]
+preprocess dataset #27:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 9329/9496 [05:06<00:02, 63.27ex/s]
+preprocess dataset #28:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 9348/9496 [05:05<00:03, 44.74ex/s]
+preprocess dataset #29:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌    | 9301/9496 [05:04<00:04, 46.37ex/s]
+preprocess dataset #30:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍   | 9341/9496 [05:03<00:02, 56.90ex/s]
+preprocess dataset #29:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9441/9496 [05:06<00:00, 62.17ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 9484/9496 [05:05<00:00, 71.30ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 9473/9496 [05:07<00:00, 69.45ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 9482/9496 [05:07<00:00, 73.61ex/s]
+    @     0x7fa5e62171a1         80  arrow::SimpleTable::~SimpleTable()██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 9492/9496 [05:07<00:00, 78.82ex/s]
+    @           0x5d41e8  (unknown)  (unknown)
+    @           0x90ef00  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7fa5e76545c9,7fa398b5d293,7fa5e83730bf,7fa5e7658ae9,7fa5e62171a0,5d41e7,90eeff&map=fbcd4e3f2be272741f2aecd9d840a066:7fa3835c0000-7fa398eefc60
+E0730 07:57:01.785126 2051144 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+https://symbolize.stripped_domain/r/?trace=7fa398bd16a3,7fa5e83730bf,7fa398a723e6,7fa398ba25c4,7fa398ba272c,7fa398b9a912,7fa398b9a3c9,7fa398ee8bcd,7fa398b5dd0e,7fa5e83730bf,7fa5e7658ae9,7fa5e62171a0,5d41e7,90eeff&map=fbcd4e3f2be272741f2aecd9d840a066:7fa3835c0000-7fa398eefc60
+E0730 07:57:01.785969 2051144 process_state.cc:1062] RAW: Signal 11 raised at PC: 0x7fa398bd16a3 while already in FailureSignalHandler!
+E0730 07:57:01.785986 2051144 process_state.cc:1097] RAW: Raising 11 signal with default behavior
+preprocess dataset #0:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #1:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #2:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #3:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #4:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #5:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #6:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #7:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #8:   5%|██████████▏                                                                                                                                                                                                                 | 59/1267 [00:02<00:32, 37.56ex/s]
+preprocess dataset #9:   5%|██████████▏                                                                                                                                                                                                                 | 59/1267 [00:02<00:34, 35.01ex/s]
+preprocess dataset #10:   4%|█████████▎                                                                                                                                                                                                                 | 54/1267 [00:02<00:33, 35.99ex/s]
+preprocess dataset #11:   4%|█████████▏                                                                                                                                                                                                                 | 53/1267 [00:02<00:34, 34.81ex/s]
+preprocess dataset #12:   4%|████████▉                                                                                                                                                                                                                  | 52/1267 [00:02<00:34, 34.88ex/s]
+preprocess dataset #13:   4%|█████████▏                                                                                                                                                                                                                 | 53/1267 [00:02<00:33, 35.87ex/s]
+preprocess dataset #14:   4%|████████▍                                                                                                                                                                                                                  | 49/1267 [00:02<00:34, 35.35ex/s]
+preprocess dataset #15:   4%|█████████▏                                                                                                                                                                                                                 | 53/1267 [00:02<00:34, 35.33ex/s]
+preprocess dataset #16:   4%|████████                                                                                                                                                                                                                   | 47/1267 [00:02<00:34, 34.98ex/s]
+preprocess dataset #17:   3%|███████▎                                                                                                                                                                                                                   | 42/1267 [00:02<00:37, 32.70ex/s]
+preprocess dataset #18:   4%|████████▍                                                                                                                                                                                                                  | 49/1267 [00:02<00:36, 33.76ex/s]
+preprocess dataset #19:   3%|██████▋                                                                                                                                                                                                                    | 39/1267 [00:02<00:38, 32.11ex/s]
+preprocess dataset #20:   3%|███████                                                                                                                                                                                                                    | 41/1266 [00:02<00:34, 35.36ex/s]
+preprocess dataset #21:   3%|██████▌                                                                                                                                                                                                                    | 38/1266 [00:01<00:38, 32.10ex/s]
+preprocess dataset #22:   3%|██████▉                                                                                                                                                                                                                    | 40/1266 [00:01<00:35, 35.01ex/s]
+preprocess dataset #23:   3%|██████▌                                                                                                                                                                                                                    | 38/1266 [00:01<00:37, 32.85ex/s]
+preprocess dataset #24:   3%|███████                                                                                                                                                                                                                    | 41/1266 [00:01<00:36, 33.56ex/s]
+preprocess dataset #25:   3%|██████                                                                                                                                                                                                                     | 35/1266 [00:01<00:37, 32.94ex/s]
+preprocess dataset #26:   3%|██████▌                                                                                                                                                                                                                    | 38/1266 [00:01<00:36, 33.21ex/s]
+preprocess dataset #27:   3%|██████▏                                                                                                                                                                                                                    | 36/1266 [00:01<00:35, 34.23ex/s]
+preprocess dataset #28:   3%|██████▍                                                                                                                                                                                                                    | 37/1266 [00:01<00:35, 34.43ex/s]
+preprocess dataset #29:   3%|█████▋                                                                                                                                                                                                                     | 33/1266 [00:01<00:38, 31.79ex/s]
+preprocess dataset #30:   3%|█████▉                                                                                                                                                                                                                     | 34/1266 [00:01<00:36, 33.47ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #26: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 1262/1266 [00:39<00:00, 31.62ex/s]
+preprocess dataset #28:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 1243/1266 [00:39<00:00, 34.98ex/s]
+preprocess dataset #28:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋   | 1247/1266 [00:39<00:00, 34.73ex/s]
+preprocess dataset #29:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋   | 1247/1266 [00:39<00:00, 34.48ex/s]
+preprocess dataset #30:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋   | 1247/1266 [00:39<00:00, 34.29ex/s]
+preprocess dataset #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1266/1267 [00:41<00:00, 31.60ex/s]
+preprocess dataset #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1261/1266 [00:40<00:00, 30.08ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 1264/1266 [00:40<00:00, 36.25ex/s]
+preprocess dataset #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 1265/1267 [00:40<00:00, 34.50ex/s]
+preprocess dataset #22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1266/1266 [00:40<00:00, 33.84ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 1263/1266 [00:40<00:00, 31.95ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 1263/1266 [00:40<00:00, 39.89ex/s]
+https://symbolize.stripped_domain/r/?trace=7fa398bd16a3,7fa5e83730bf,7fa398a723e6,7fa398ba25c4,7fa398ba272c,7fa398b9a912,7fa398b9a3c9,7fa398ee8bcd,7fa398b5dd0e,7fa5e83730bf,90eeff&map=fbcd4e3f2be272741f2aecd9d840a066:7fa3835c0000-7fa398eefc60 ██▊| 1265/1266 [00:40<00:00, 32.41ex/s]
+E0730 07:58:24.878749 1887642 process_state.cc:1062] RAW: Signal 11 raised at PC: 0x7fa398bd16a3 while already in FailureSignalHandler!
+E0730 07:58:24.878774 1887642 process_state.cc:1097] RAW: Raising 11 signal with default behavior
+    @ 0xbc81519e3c819806  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7fa5e831f3f4,7fa398b5d293,7fa5e83730bf,7f,bc81519e3c819805&map=fbcd4e3f2be272741f2aecd9d840a066:7fa3835c0000-7fa398eefc60
+E0730 07:58:24.899989 1887643 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+https://symbolize.stripped_domain/r/?trace=7fa398bd16a3,7fa5e83730bf,7fa398a723e6,7fa398ba25c4,7fa398ba272c,7fa398b9a912,7fa398b9a3c9,7fa398ee8bcd,7fa398b5dd0e,7fa5e83730bf,7f,bc81519e3c819805&map=fbcd4e3f2be272741f2aecd9d840a066:7fa3835c0000-7fa398eefc60
+E0730 07:58:24.900826 1887643 process_state.cc:1062] RAW: Signal 11 raised at PC: 0x7fa398bd16a3 while already in FailureSignalHandler!
+E0730 07:58:24.900854 1887643 process_state.cc:1097] RAW: Raising 11 signal with default behavior
+preprocess dataset #0:   0%|▍                                                                                                                                                                                                                            | 5/2555 [00:00<06:06,  6.97ex/s]
+preprocess dataset #1:   0%|▍                                                                                                                                                                                                                            | 5/2555 [00:00<06:00,  7.08ex/s]
+preprocess dataset #2:   0%|▋                                                                                                                                                                                                                            | 8/2555 [00:00<03:23, 12.55ex/s]
+preprocess dataset #3:   0%|▍                                                                                                                                                                                                                            | 5/2555 [00:00<05:50,  7.28ex/s]
+preprocess dataset #4:   0%|▎                                                                                                                                                                                                                            | 4/2555 [00:00<07:06,  5.98ex/s]
+preprocess dataset #5:   0%|▌                                                                                                                                                                                                                            | 7/2555 [00:00<03:54, 10.86ex/s]
+preprocess dataset #6:   0%|▎                                                                                                                                                                                                                            | 4/2554 [00:00<06:43,  6.33ex/s]
+preprocess dataset #7:   0%|▍                                                                                                                                                                                                                            | 5/2554 [00:00<05:31,  7.68ex/s]
+preprocess dataset #8:   0%|                                                                                                                                                                                                                             | 1/2554 [00:00<32:08,  1.32ex/s]
+preprocess dataset #9:   0%|                                                                                                                                                                                                                                     | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #10:   0%|                                                                                                                                                                                                                            | 1/2554 [00:00<31:37,  1.35ex/s]
+preprocess dataset #11:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #12:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #13:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #14:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #15:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #16:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #17:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #18:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #19:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #20:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #21:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #22:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #23:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #24:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #25:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #26:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #27:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #28:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #29:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #30:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉            | 2412/2554 [01:14<00:04, 33.62ex/s]
+
+preprocess dataset #28:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎   | 2510/2554 [01:18<00:01, 35.54ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2551/2554 [01:18<00:00, 37.28ex/s]
+preprocess dataset #27:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉    | 2506/2554 [01:19<00:01, 47.09ex/s]
+preprocess dataset #27:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 2537/2554 [01:19<00:00, 44.53ex/s]
+preprocess dataset #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 2543/2554 [01:20<00:00, 35.81ex/s]
+preprocess dataset #10:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 2524/2554 [01:21<00:01, 23.83ex/s]
+preprocess dataset #10:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 2527/2554 [01:21<00:01, 20.85ex/s]
+preprocess dataset #10:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 2530/2554 [01:22<00:01, 22.01ex/s]
+preprocess dataset #10:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 2540/2554 [01:22<00:00, 29.03ex/s]
+preprocess dataset #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2548/2554 [01:22<00:00, 29.79ex/s]
+preprocess dataset #18:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 2540/2554 [01:21<00:00, 37.15ex/s]
+preprocess dataset #16:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 2537/2554 [01:21<00:00, 31.60ex/s]
+preprocess dataset #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 2545/2554 [01:21<00:00, 28.93ex/s]
+preprocess dataset #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2551/2554 [01:21<00:00, 36.19ex/s]
+#5: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 549.01ba/s]
+#6: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 532.91ba/s]
+#2: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 340.20ba/s]
+#7: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 528.69ba/s]
+#8: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 560.17ba/s]
+#9: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 561.55ba/s]
+#10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 553.05ba/s]
+#11: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 539.71ba/s]
+#12: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 564.27ba/s]
+#13: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 565.99ba/s]
+#16: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 547.90ba/s]
+#17: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 566.32ba/s]
+#18: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 571.85ba/s]
+#14: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 340.74ba/s]
+#15: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 357.13ba/s]
+#19: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 542.52ba/s]
+#20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 566.54ba/s]
+#22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 578.97ba/s]
+#23: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 575.14ba/s]
+#21: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 349.44ba/s]
+#24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 589.24ba/s]
+#25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 568.03ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 584.00ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 576.12ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 583.79ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 583.25ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 385.94ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 417.14ba/s]
+#19:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 233.52ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 285.72ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 287.88ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 308.96ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 319.25ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 328.57ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 326.20ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 330.96ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 343.83ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 126.92ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 127.36ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 129.66ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 133.33ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 134.18ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 135.07ba/s]
+https://symbolize.stripped_domain/r/?trace=7fa5e831f3f4,7fa5e83730bf,7f,bc81519e3c819805&map=                                                                                                                                                                      | 0/10 [00:00<?, ?ba/s]
+*** SIGTERM received by PID 1054298 (TID 1054298) on cpu 86 from PID 2047809; stack trace: ***████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 98.76ba/s]
+PC: @     0x7fa5e831f3f4  (unknown)  do_futex_wait.constprop.0                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @     0x7fa398b5d294        976  (unknown)
+    @     0x7fa5e83730c0  956349584  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @               0x80  (unknown)  (unknown)
+    @ 0xbc81519e3c819806  (unknown)  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+https://symbolize.stripped_domain/r/?trace=7fa5e831f3f4,7fa398b5d293,7fa5e83730bf,7f,bc81519e3c819805&map=fbcd4e3f2be272741f2aecd9d840a066:7fa3835c0000-7fa398eefc60                                                                                               | 0/10 [00:00<?, ?ba/s]
+E0730 08:01:01.890274 1054298 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                              | 0/10 [00:00<?, ?ba/s]
+E0730 08:01:01.939006 1054298 process_state.cc:774] RAW: Raising signal 15 with default behavior                                                                                                                                                                   | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#9: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 110.89ba/s]
+#10: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 108.74ba/s]
+#12: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 112.05ba/s]
+#11: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 76.27ba/s]
+#13: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 79.64ba/s]
+#14: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 113.72ba/s]
+#15: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 105.47ba/s]
+#16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.02ba/s]
+#17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.94ba/s]
+#18: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 85.91ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.62ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 102.38ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 103.83ba/s]
+#21: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 68.55ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 102.02ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 101.62ba/s]
+#23: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 81.70ba/s]
+#26: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 92.01ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 100.16ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 103.57ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 103.42ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 105.87ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 110.14ba/s]
+https://symbolize.stripped_domain/r/?trace=7fa5e831f3f4,7fa5e83730bf,7f,bc81519e3c819805&map=                                                                                                                                                                       | 0/2 [00:00<?, ?ba/s]
+*** SIGTERM received by PID 1054437 (TID 1054437) on cpu 42 from PID 2047809; stack trace: ***                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+PC: @     0x7fa5e831f3f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7fa398b5d294        976  (unknown)
+    @     0x7fa5e83730c0  956349584  (unknown)                                                                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+    @               0x80  (unknown)  (unknown)
+    @ 0xbc81519e3c819806  (unknown)  (unknown)                                                                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+https://symbolize.stripped_domain/r/?trace=7fa5e831f3f4,7fa398b5d293,7fa5e83730bf,7f,bc81519e3c819805&map=fbcd4e3f2be272741f2aecd9d840a066:7fa3835c0000-7fa398eefc60
+E0730 08:01:08.850356 1054437 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0730 08:01:08.923887 1054437 process_state.cc:774] RAW: Raising signal 15 with default behavior                                                                                                                                                                    | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 122.34ba/s]
+#17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 121.57ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 115.00ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 120.30ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 119.03ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 117.19ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 117.05ba/s]
+#23: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 76.07ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 114.47ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 114.55ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 116.78ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 110.56ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 121.72ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 111.93ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 126.24ba/s]
+https://symbolize.stripped_domain/r/?trace=7fa5e831f3f4,7fa5e83730bf,7f,bc81519e3c819805&map=
+*** SIGTERM received by PID 1054578 (TID 1054578) on cpu 84 from PID 2047809; stack trace: ***                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+PC: @     0x7fa5e831f3f4  (unknown)  do_futex_wait.constprop.0                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+    @     0x7fa398b5d294        976  (unknown)
+    @     0x7fa5e83730c0  956349584  (unknown)
+    @               0x80  (unknown)  (unknown)                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+    @ 0xbc81519e3c819806  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7fa5e831f3f4,7fa398b5d293,7fa5e83730bf,7f,bc81519e3c819805&map=fbcd4e3f2be272741f2aecd9d840a066:7fa3835c0000-7fa398eefc60                                                                                                | 0/3 [00:00<?, ?ba/s]
+E0730 08:01:15.945170 1054578 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                               | 0/3 [00:00<?, ?ba/s]
+E0730 08:01:16.016730 1054578 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+    "Wav2Vec2ForPreTraining"                                                                                                                                                                                                                                        | 0/3 [00:00<?, ?ba/s]
+  ],
+  "attention_dropout": 0.094,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.04,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": true,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.047,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.041,
+  "mask_feature_length": 64,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.25,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.082,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 38,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 39,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file ./preprocessor_config.json
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+2022-07-30 08:01:17.653063: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
+2022-07-30 08:01:17.653114: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
+/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+INFO:__main__:***** Running training *****
+INFO:__main__:  Num examples = 302693
+INFO:__main__:  Num Epochs = 40
+INFO:__main__:  Instantaneous batch size per device = 32
+INFO:__main__:  Num gradient accumulation steps = 1
+INFO:__main__:  Total train batch size (w. parallel & distributed) = 256
+INFO:__main__:  Total optimization steps = 47280
+INFO:__main__:  Gradient checkpointing: True
+INFO:__main__:  Use scan: False
+INFO:__main__:  Fuse matmuls: False
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [00:00<?, ?it/s]
+
+  grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)                                                                                                                                                                                     | 0/1182 [00:00<?, ?it/s]
+run_flax_speech_recognition_ctc.py:1392: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+run_flax_speech_recognition_ctc.py:1399: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+Training...:   0%|                                                                                                                                                                                                                                               | 0/1182 [04:18<?, ?it/s]
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [04:25<?, ?it/s]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback
+    return fun(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss
+    out_tree, out_flat = f_pmapped_(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f
+    out = pxla.xla_pmap(
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind
+    return map_bind(self, fun, *args, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind
+    outs = primitive.process(top_trace, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process
+    return trace.process_map(self, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call
+    return primitive.impl(f, *tracers, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 829, in xla_pmap_impl
+    compiled_fun, fingerprint = parallel_callable(
+  File "/data/flax/lib/python3.8/site-packages/jax/linear_util.py", line 295, in memoized_fun
+    ans = call(fun, *args)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 860, in parallel_callable
+    pmap_executable = pmap_computation.compile()
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper
+    return func(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1137, in compile
+    self._executable = PmapExecutable.from_hlo(self._hlo, **self.compile_args)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1285, in from_hlo
+    compiled = dispatch.compile_or_get_cached(
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 899, in compile_or_get_cached
+    return backend_compile(backend, computation, compile_options, host_callbacks)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper
+    return func(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 843, in backend_compile
+    return backend.compile(built_c, compile_options=options)
+jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 22.29G of 15.48G hbm. Exceeded hbm capacity by 6.81G.
+Total hbm usage >= 22.81G:
+    reserved        530.00M
+    program          11.45G
+    arguments        10.84G
+Output size 10.76G; shares 10.76G with arguments.
+Program hbm requirement 11.45G:
+    global           244.0K
+    scoped           72.08M
+    HLO temp         11.38G (99.3% utilization: Unpadded (10.50G) Padded (10.58G), 7.1% fragmentation (826.39M))
+  Largest program allocations in hbm:
+  1. Size: 1.95G
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95
+     Shape: f32[32,16,999,999]{2,3,1,0:T(8,128)}
+     Unpadded size: 1.90G
+     Extra memory due to padding: 50.78M (1.0x expansion)
+     XLA label: fusion.180.remat6 = fusion(bitcast.7587, bitcast.7585, fusion.14562), kind=kOutput, calls=fused_computation.176.clone.clone.clone.clone.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  2. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/feed_forward/intermediate_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,5120]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.1570.remat = fusion(fusion.6368, get-tuple-element.21296, bitcast.11253), kind=kOutput, calls=fused_computation.1410.clone
+     Allocation type: HLO temp
+     ==========================
+  3. Size: 312.19M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (1,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[32,999,5120]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 312.19M
+     XLA label: fusion.24115 = fusion(fusion.1570.remat, get-tuple-element.21271, get-tuple-element.21270, get-tuple-element.21305, ...(+1)), kind=kOutput, calls=fused_computation.18872
+     Allocation type: HLO temp
+     ==========================
+  4. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.7152 = fusion(get-tuple-element.9850, get-tuple-element.20844, get-tuple-element.10084, get-tuple-element.20843, ...(+3)), kind=kLoop, calls=fused_computation.6596
+     Allocation type: HLO temp
+     ==========================
+  5. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/15/15/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20091 = fusion(fusion.6802.remat, get-tuple-element.13360, fusion.1630, bitcast.11123), kind=kOutput, calls=fused_computation.18657
+     Allocation type: HLO temp
+     ==========================
+  6. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/33/33/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20055 = fusion(fusion.6766.remat, get-tuple-element.13680, fusion.1612, bitcast.11087), kind=kOutput, calls=fused_computation.18621
+     Allocation type: HLO temp
+     ==========================
+  7. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.6654.remat2 = fusion(get-tuple-element.21303, copy.14050.remat2, get-tuple-element.21290, bitcast.11252), kind=kOutput, calls=fused_computation.6098.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  8. Size: 156.09M
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: copy.16362 = copy(bitcast.14787)
+     Allocation type: HLO temp
+     ==========================
+  9. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20123 = fusion(fusion.6836, copy.16362, get-tuple-element.14006, fusion.8933, ...(+1)), kind=kLoop, calls=fused_computation.18689
+     Allocation type: HLO temp
+     ==========================
+  10. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/32/32/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20057 = fusion(fusion.6768.remat, get-tuple-element.13664, fusion.1613, bitcast.11089), kind=kOutput, calls=fused_computation.18623
+     Allocation type: HLO temp
+     ==========================
+  11. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20121 = fusion(get-tuple-element.11240, get-tuple-element.13248, fusion.1645, bitcast.11153), kind=kOutput, calls=fused_computation.18687
+     Allocation type: HLO temp
+     ==========================
+  12. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20119 = fusion(get-tuple-element.11244, get-tuple-element.13264, fusion.1644, bitcast.11151), kind=kOutput, calls=fused_computation.18685
+     Allocation type: HLO temp
+     ==========================
+  13. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20117 = fusion(get-tuple-element.11248, get-tuple-element.13440, fusion.1643, bitcast.11149), kind=kOutput, calls=fused_computation.18683
+     Allocation type: HLO temp
+     ==========================
+  14. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20115 = fusion(get-tuple-element.11252, get-tuple-element.13616, fusion.1642, bitcast.11147), kind=kOutput, calls=fused_computation.18681
+     Allocation type: HLO temp
+     ==========================
+  15. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20113 = fusion(get-tuple-element.11256, get-tuple-element.13792, fusion.1641, bitcast.11145), kind=kOutput, calls=fused_computation.18679
+     Allocation type: HLO temp
+     ==========================
+  16. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20111 = fusion(get-tuple-element.11260, get-tuple-element.13936, fusion.1640, bitcast.11143), kind=kOutput, calls=fused_computation.18677
+     Allocation type: HLO temp
+     ==========================
+  17. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20109 = fusion(get-tuple-element.11264, get-tuple-element.13952, fusion.1639, bitcast.11141), kind=kOutput, calls=fused_computation.18675
+     Allocation type: HLO temp
+     ==========================
+  18. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20107 = fusion(get-tuple-element.11268, get-tuple-element.13968, fusion.1638, bitcast.11139), kind=kOutput, calls=fused_computation.18673
+     Allocation type: HLO temp
+     ==========================
+  19. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20105 = fusion(get-tuple-element.11272, get-tuple-element.13984, fusion.1637, bitcast.11137), kind=kOutput, calls=fused_computation.18671
+     Allocation type: HLO temp
+     ==========================
+  20. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20103 = fusion(get-tuple-element.11276, get-tuple-element.14000, fusion.1636, bitcast.11135), kind=kOutput, calls=fused_computation.18669
+     Allocation type: HLO temp
+     ==========================
+The stack trace below excludes JAX-internal frames.
+The preceding is the original exception that occurred, unmodified.
+--------------------
+The above exception was the direct cause of the following exception:
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 22.29G of 15.48G hbm. Exceeded hbm capacity by 6.81G.
+Total hbm usage >= 22.81G:
+    reserved        530.00M
+    program          11.45G
+    arguments        10.84G
+Output size 10.76G; shares 10.76G with arguments.
+Program hbm requirement 11.45G:
+    global           244.0K
+    scoped           72.08M
+    HLO temp         11.38G (99.3% utilization: Unpadded (10.50G) Padded (10.58G), 7.1% fragmentation (826.39M))
+  Largest program allocations in hbm:
+  1. Size: 1.95G
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95
+     Shape: f32[32,16,999,999]{2,3,1,0:T(8,128)}
+     Unpadded size: 1.90G
+     Extra memory due to padding: 50.78M (1.0x expansion)
+     XLA label: fusion.180.remat6 = fusion(bitcast.7587, bitcast.7585, fusion.14562), kind=kOutput, calls=fused_computation.176.clone.clone.clone.clone.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  2. Size: 624.38M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/feed_forward/intermediate_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,5120]{2,0,1:T(8,128)}
+     Unpadded size: 624.38M
+     XLA label: fusion.1570.remat = fusion(fusion.6368, get-tuple-element.21296, bitcast.11253), kind=kOutput, calls=fused_computation.1410.clone
+     Allocation type: HLO temp
+     ==========================
+  3. Size: 312.19M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (1,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[32,999,5120]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 312.19M
+     XLA label: fusion.24115 = fusion(fusion.1570.remat, get-tuple-element.21271, get-tuple-element.21270, get-tuple-element.21305, ...(+1)), kind=kOutput, calls=fused_computation.18872
+     Allocation type: HLO temp
+     ==========================
+  4. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.7152 = fusion(get-tuple-element.9850, get-tuple-element.20844, get-tuple-element.10084, get-tuple-element.20843, ...(+3)), kind=kLoop, calls=fused_computation.6596
+     Allocation type: HLO temp
+     ==========================
+  5. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/15/15/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20091 = fusion(fusion.6802.remat, get-tuple-element.13360, fusion.1630, bitcast.11123), kind=kOutput, calls=fused_computation.18657
+     Allocation type: HLO temp
+     ==========================
+  6. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/33/33/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20055 = fusion(fusion.6766.remat, get-tuple-element.13680, fusion.1612, bitcast.11087), kind=kOutput, calls=fused_computation.18621
+     Allocation type: HLO temp
+     ==========================
+  7. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/34/remat(core_fn)/34/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.6654.remat2 = fusion(get-tuple-element.21303, copy.14050.remat2, get-tuple-element.21290, bitcast.11252), kind=kOutput, calls=fused_computation.6098.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  8. Size: 156.09M
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: copy.16362 = copy(bitcast.14787)
+     Allocation type: HLO temp
+     ==========================
+  9. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20123 = fusion(fusion.6836, copy.16362, get-tuple-element.14006, fusion.8933, ...(+1)), kind=kLoop, calls=fused_computation.18689
+     Allocation type: HLO temp
+     ==========================
+  10. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/32/32/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20057 = fusion(fusion.6768.remat, get-tuple-element.13664, fusion.1613, bitcast.11089), kind=kOutput, calls=fused_computation.18623
+     Allocation type: HLO temp
+     ==========================
+  11. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20121 = fusion(get-tuple-element.11240, get-tuple-element.13248, fusion.1645, bitcast.11153), kind=kOutput, calls=fused_computation.18687
+     Allocation type: HLO temp
+     ==========================
+  12. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20119 = fusion(get-tuple-element.11244, get-tuple-element.13264, fusion.1644, bitcast.11151), kind=kOutput, calls=fused_computation.18685
+     Allocation type: HLO temp
+     ==========================
+  13. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20117 = fusion(get-tuple-element.11248, get-tuple-element.13440, fusion.1643, bitcast.11149), kind=kOutput, calls=fused_computation.18683
+     Allocation type: HLO temp
+     ==========================
+  14. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20115 = fusion(get-tuple-element.11252, get-tuple-element.13616, fusion.1642, bitcast.11147), kind=kOutput, calls=fused_computation.18681
+     Allocation type: HLO temp
+     ==========================
+  15. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20113 = fusion(get-tuple-element.11256, get-tuple-element.13792, fusion.1641, bitcast.11145), kind=kOutput, calls=fused_computation.18679
+     Allocation type: HLO temp
+     ==========================
+  16. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20111 = fusion(get-tuple-element.11260, get-tuple-element.13936, fusion.1640, bitcast.11143), kind=kOutput, calls=fused_computation.18677
+     Allocation type: HLO temp
+     ==========================
+  17. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20109 = fusion(get-tuple-element.11264, get-tuple-element.13952, fusion.1639, bitcast.11141), kind=kOutput, calls=fused_computation.18675
+     Allocation type: HLO temp
+     ==========================
+  18. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20107 = fusion(get-tuple-element.11268, get-tuple-element.13968, fusion.1638, bitcast.11139), kind=kOutput, calls=fused_computation.18673
+     Allocation type: HLO temp
+     ==========================
+  19. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20105 = fusion(get-tuple-element.11272, get-tuple-element.13984, fusion.1637, bitcast.11137), kind=kOutput, calls=fused_computation.18671
+     Allocation type: HLO temp
+     ==========================
+  20. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[32,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 156.09M
+     XLA label: fusion.20103 = fusion(get-tuple-element.11276, get-tuple-element.14000, fusion.1636, bitcast.11135), kind=kOutput, calls=fused_computation.18669
+     Allocation type: HLO temp
+     ==========================
\ No newline at end of file
diff --git a/wandb/run-20220730_074919-12xoayks/files/requirements.txt b/wandb/run-20220730_074919-12xoayks/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab
--- /dev/null
+++ b/wandb/run-20220730_074919-12xoayks/files/requirements.txt
@@ -0,0 +1,158 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+exceptiongroup==1.0.0rc8
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+hypothesis==6.53.0
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+jiwer==2.3.0
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pyctcdecode==0.4.0
+pygments==2.11.1
+pygtrie==2.5.0
+pyparsing==3.0.6
+python-dateutil==2.8.2
+python-levenshtein==0.12.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+sortedcontainers==2.4.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220730_074919-12xoayks/files/wandb-metadata.json b/wandb/run-20220730_074919-12xoayks/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..41ff4dfd696b810ef0e4d314ba8b17d3b68d086c
--- /dev/null
+++ b/wandb/run-20220730_074919-12xoayks/files/wandb-metadata.json
@@ -0,0 +1,66 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-30T07:49:23.141477",
+    "startedAt": "2022-07-30T07:49:19.732765",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=./",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=32",
+        "--per_device_eval_batch_size=32",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220730_074919-12xoayks/files/wandb-summary.json b/wandb/run-20220730_074919-12xoayks/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..eb7e27891b1ad34db9f1f3ef6a20af17baa76819
--- /dev/null
+++ b/wandb/run-20220730_074919-12xoayks/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 994}}
\ No newline at end of file
diff --git a/wandb/run-20220730_074919-12xoayks/logs/debug-internal.log b/wandb/run-20220730_074919-12xoayks/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..fbaf5f4190d3a5ccbade8b3a0a946d6eb296b5d8
--- /dev/null
+++ b/wandb/run-20220730_074919-12xoayks/logs/debug-internal.log
@@ -0,0 +1,526 @@
+2022-07-30 07:49:20,650 INFO    MainThread:2049141 [internal.py:wandb_internal():87] W&B internal server running at pid: 2049141, started at: 2022-07-30 07:49:20.650404
+2022-07-30 07:49:20,652 INFO    WriterThread:2049141 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/run-12xoayks.wandb
+2022-07-30 07:49:20,652 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: check_version
+2022-07-30 07:49:20,653 DEBUG   SenderThread:2049141 [sender.py:send():234] send: header
+2022-07-30 07:49:20,653 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: check_version
+2022-07-30 07:49:20,693 DEBUG   SenderThread:2049141 [sender.py:send():234] send: run
+2022-07-30 07:49:20,858 INFO    SenderThread:2049141 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files
+2022-07-30 07:49:20,858 INFO    SenderThread:2049141 [sender.py:_start_run_threads():804] run started: 12xoayks with start time 1659167359
+2022-07-30 07:49:20,858 DEBUG   SenderThread:2049141 [sender.py:send():234] send: summary
+2022-07-30 07:49:20,859 INFO    SenderThread:2049141 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 07:49:20,859 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: run_start
+2022-07-30 07:49:21,863 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/wandb-summary.json
+2022-07-30 07:49:23,141 DEBUG   HandlerThread:2049141 [meta.py:__init__():40] meta init
+2022-07-30 07:49:23,141 DEBUG   HandlerThread:2049141 [meta.py:__init__():54] meta init done
+2022-07-30 07:49:23,141 DEBUG   HandlerThread:2049141 [meta.py:probe():214] probe
+2022-07-30 07:49:23,142 DEBUG   HandlerThread:2049141 [meta.py:_setup_git():204] setup git
+2022-07-30 07:49:23,180 DEBUG   HandlerThread:2049141 [meta.py:_setup_git():211] setup git done
+2022-07-30 07:49:23,180 DEBUG   HandlerThread:2049141 [meta.py:_save_code():92] save code
+2022-07-30 07:49:23,193 DEBUG   HandlerThread:2049141 [meta.py:_save_code():113] save code done
+2022-07-30 07:49:23,193 DEBUG   HandlerThread:2049141 [meta.py:_save_patches():130] save patches
+2022-07-30 07:49:23,284 DEBUG   HandlerThread:2049141 [meta.py:_save_patches():172] save patches done
+2022-07-30 07:49:23,284 DEBUG   HandlerThread:2049141 [meta.py:_save_pip():58] save pip
+2022-07-30 07:49:23,284 DEBUG   HandlerThread:2049141 [meta.py:_save_pip():72] save pip done
+2022-07-30 07:49:23,284 DEBUG   HandlerThread:2049141 [meta.py:probe():252] probe done
+2022-07-30 07:49:23,287 DEBUG   SenderThread:2049141 [sender.py:send():234] send: files
+2022-07-30 07:49:23,287 INFO    SenderThread:2049141 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-30 07:49:23,288 INFO    SenderThread:2049141 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-30 07:49:23,288 INFO    SenderThread:2049141 [sender.py:_save_file():939] saving file diff.patch with policy now
+2022-07-30 07:49:23,294 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:49:23,297 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:49:23,768 INFO    Thread-11 :2049141 [upload_job.py:push():137] Uploaded file /tmp/tmpqr2f7h1jwandb/c7b0qlvh-wandb-metadata.json
+2022-07-30 07:49:23,776 INFO    Thread-13 :2049141 [upload_job.py:push():137] Uploaded file /tmp/tmpqr2f7h1jwandb/1z8w735k-diff.patch
+2022-07-30 07:49:23,863 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/diff.patch
+2022-07-30 07:49:23,863 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/wandb-metadata.json
+2022-07-30 07:49:23,863 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/code/run_flax_speech_recognition_ctc.py
+2022-07-30 07:49:23,863 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:49:23,863 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/requirements.txt
+2022-07-30 07:49:23,863 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/code
+2022-07-30 07:49:23,979 INFO    Thread-12 :2049141 [upload_job.py:push():137] Uploaded file /tmp/tmpqr2f7h1jwandb/3m9yt5mk-code/run_flax_speech_recognition_ctc.py
+2022-07-30 07:49:25,864 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:49:27,864 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:49:29,865 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:49:31,866 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:49:37,869 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:49:38,432 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:49:38,432 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:49:39,870 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:49:51,223 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:49:51,875 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:49:53,571 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:49:53,571 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:49:53,876 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:50:03,880 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:50:05,881 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:50:07,882 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:50:08,710 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:50:08,710 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:50:21,295 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:50:22,889 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:50:23,847 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:50:23,848 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:50:39,000 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:50:39,000 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:50:51,366 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:50:54,167 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:50:54,168 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:51:05,909 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:07,909 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:09,354 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:51:09,354 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:51:09,910 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:11,911 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:13,912 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:15,913 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:17,914 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:19,915 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:21,435 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:51:21,916 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:23,917 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:24,523 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:51:24,523 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:51:26,918 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:28,919 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:30,920 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:32,921 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:34,922 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:36,923 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:38,924 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:39,670 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:51:39,671 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:51:40,929 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:42,930 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:44,931 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:46,932 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:48,932 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:50,934 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:51,510 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:51:52,938 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:54,842 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:51:54,843 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:51:54,939 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:56,940 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:51:58,941 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:00,942 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:02,943 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:04,944 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:06,945 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:08,946 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:09,982 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:52:09,983 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:52:10,947 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:12,948 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:14,949 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:16,950 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:18,952 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:20,953 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:21,591 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:52:22,954 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:24,955 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:25,125 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:52:25,125 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:52:26,956 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:28,957 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:30,957 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:32,959 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:34,960 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:36,961 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:38,962 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:40,264 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:52:40,265 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:52:40,963 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:42,964 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:44,965 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:46,968 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:48,970 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:50,969 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:51,681 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:52:52,970 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:54,972 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:55,405 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:52:55,405 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:52:56,973 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:52:58,974 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:00,975 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:02,976 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:04,977 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:06,978 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:08,979 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:10,564 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:53:10,564 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:53:10,980 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:12,981 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:14,982 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:16,984 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:18,984 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:20,985 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:21,760 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:53:22,986 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:24,987 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:25,711 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:53:25,711 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:53:26,988 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:28,990 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:30,991 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:32,992 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:34,993 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:36,994 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:38,995 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:40,847 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:53:40,848 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:53:40,996 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:42,997 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:44,998 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:46,999 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:49,000 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:51,001 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:51,846 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:53:53,003 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:55,003 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:55,989 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:53:55,989 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:53:57,004 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:53:59,005 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:01,006 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:03,007 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:05,008 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:07,009 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:09,010 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:11,011 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:11,149 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:54:11,149 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:54:13,012 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:15,013 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:17,014 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:19,015 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:21,016 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:21,922 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:54:23,017 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:25,018 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:26,293 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:54:26,294 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:54:27,019 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:29,020 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:31,021 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:54:41,436 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:54:41,436 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:54:52,000 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:54:56,579 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:54:56,580 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:55:11,718 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:55:11,718 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:55:22,079 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:55:26,855 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:55:26,855 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:55:41,992 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:55:41,993 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:55:52,160 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:55:57,152 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:55:57,153 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:56:05,066 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:07,068 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:09,069 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:11,070 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:12,295 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:56:12,295 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:56:13,072 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:15,073 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:17,074 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:19,076 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:21,077 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:22,240 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:56:23,078 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:25,079 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:27,081 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:29,082 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:29,109 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:56:29,110 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:56:31,083 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:33,084 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:35,085 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:38,087 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:40,088 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:56:44,258 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:56:44,261 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:56:52,311 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:56:59,398 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:56:59,399 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:57:04,098 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:14,102 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:14,536 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:57:14,537 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:57:16,103 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:18,104 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:20,105 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:22,106 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:22,386 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:57:24,107 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:26,108 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:28,109 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:29,675 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:57:29,675 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:57:30,110 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:32,111 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:34,112 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:36,113 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:38,114 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:40,115 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:42,116 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:44,117 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:44,903 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:57:44,903 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:57:48,119 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:50,120 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:52,121 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:52,465 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:57:54,123 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:57:56,124 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:58:00,053 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:58:00,054 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:58:15,191 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:58:15,192 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:58:22,531 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:58:26,137 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:58:30,422 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:58:30,422 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:58:36,141 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:58:39,143 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:58:41,144 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:58:43,145 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:58:45,146 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:58:45,614 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:58:45,614 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:58:47,147 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:58:49,148 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:58:51,149 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:58:52,600 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:58:53,150 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:58:55,151 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:58:57,152 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:58:59,153 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:00,788 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:59:00,789 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:59:01,154 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:03,155 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:05,156 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:07,157 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:11,159 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:13,160 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:15,161 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:15,931 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:59:15,932 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:59:17,162 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:19,163 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:21,164 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:22,675 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:59:23,166 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:25,167 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:27,168 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:29,170 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:31,112 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:59:31,112 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:59:31,170 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:33,171 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:35,172 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:37,173 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:39,175 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:41,176 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:43,177 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:45,178 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:46,268 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 07:59:46,268 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 07:59:47,179 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:49,180 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:51,181 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:52,761 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 07:59:53,182 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:55,183 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:57,184 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 07:59:59,185 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:00:01,428 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:00:01,428 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:00:16,561 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:00:16,561 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:00:22,834 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 08:00:31,695 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:00:31,696 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:00:43,203 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:00:46,887 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:00:46,887 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:00:50,206 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:00:52,914 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 08:00:56,209 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:01:02,081 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:01:02,082 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:01:04,212 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:01:10,215 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:01:17,218 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:01:17,218 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:01:17,219 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:01:19,219 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:01:22,990 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 08:01:23,220 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:01:27,222 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:01:29,223 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:01:32,360 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:01:32,360 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:01:35,226 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:01:47,601 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:01:47,602 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:01:53,067 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 08:02:02,879 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:02:02,880 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:02:18,049 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:02:18,049 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:02:20,243 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:02:23,142 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 08:02:28,245 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:02:30,246 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:02:33,227 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:02:33,227 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:02:48,394 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:02:48,394 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:02:53,216 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 08:03:03,546 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:03:03,546 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:03:18,684 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:03:18,685 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:03:23,296 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 08:03:33,823 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:03:33,824 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:03:48,968 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:03:48,968 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:03:53,372 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 08:04:04,142 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:04:04,143 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:04:19,278 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:04:19,278 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:04:23,452 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 08:04:34,413 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:04:34,413 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:04:49,554 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:04:49,554 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:04:53,529 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 08:05:04,700 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:05:04,701 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:05:19,843 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:05:19,843 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:05:23,606 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 08:05:34,979 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:05:34,979 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:05:50,116 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:05:50,117 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:05:53,680 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 08:05:55,098 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:05:55,098 DEBUG   SenderThread:2049141 [sender.py:send():234] send: telemetry
+2022-07-30 08:05:55,099 DEBUG   SenderThread:2049141 [sender.py:send():234] send: exit
+2022-07-30 08:05:55,099 INFO    SenderThread:2049141 [sender.py:send_exit():366] handling exit code: 1
+2022-07-30 08:05:55,101 INFO    SenderThread:2049141 [sender.py:send_exit():368] handling runtime: 994
+2022-07-30 08:05:55,102 INFO    SenderThread:2049141 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 08:05:55,102 INFO    SenderThread:2049141 [sender.py:send_exit():374] send defer
+2022-07-30 08:05:55,102 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:05:55,103 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:05:55,103 INFO    HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-30 08:05:55,103 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:05:55,103 INFO    SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-30 08:05:55,103 INFO    SenderThread:2049141 [sender.py:transition_state():387] send defer: 1
+2022-07-30 08:05:55,104 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:05:55,104 INFO    HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-30 08:05:55,183 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:05:55,183 INFO    SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-30 08:05:55,184 INFO    SenderThread:2049141 [sender.py:transition_state():387] send defer: 2
+2022-07-30 08:05:55,184 DEBUG   SenderThread:2049141 [sender.py:send():234] send: stats
+2022-07-30 08:05:55,184 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:05:55,184 INFO    HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-30 08:05:55,185 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:05:55,185 INFO    SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-30 08:05:55,185 INFO    SenderThread:2049141 [sender.py:transition_state():387] send defer: 3
+2022-07-30 08:05:55,185 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:05:55,185 INFO    HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-30 08:05:55,185 DEBUG   SenderThread:2049141 [sender.py:send():234] send: summary
+2022-07-30 08:05:55,186 INFO    SenderThread:2049141 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 08:05:55,186 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:05:55,186 INFO    SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-30 08:05:55,186 INFO    SenderThread:2049141 [sender.py:transition_state():387] send defer: 4
+2022-07-30 08:05:55,186 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:05:55,186 INFO    HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-30 08:05:55,186 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:05:55,187 INFO    SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-30 08:05:55,205 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:05:55,326 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/wandb-summary.json
+2022-07-30 08:05:55,326 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:05:55,431 INFO    SenderThread:2049141 [sender.py:transition_state():387] send defer: 5
+2022-07-30 08:05:55,431 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:05:55,432 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:05:55,432 INFO    HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-30 08:05:55,432 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:05:55,432 INFO    SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-30 08:05:55,432 INFO    SenderThread:2049141 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-30 08:05:55,533 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:05:56,326 INFO    Thread-8  :2049141 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/config.yaml
+2022-07-30 08:05:56,327 INFO    SenderThread:2049141 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files
+2022-07-30 08:05:56,328 INFO    SenderThread:2049141 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/config.yaml config.yaml
+2022-07-30 08:05:56,328 INFO    SenderThread:2049141 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/diff.patch diff.patch
+2022-07-30 08:05:56,328 INFO    SenderThread:2049141 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/requirements.txt requirements.txt
+2022-07-30 08:05:56,332 INFO    SenderThread:2049141 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log output.log
+2022-07-30 08:05:56,332 INFO    SenderThread:2049141 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/wandb-summary.json wandb-summary.json
+2022-07-30 08:05:56,335 INFO    SenderThread:2049141 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/wandb-metadata.json wandb-metadata.json
+2022-07-30 08:05:56,338 INFO    SenderThread:2049141 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-30 08:05:56,338 INFO    SenderThread:2049141 [sender.py:transition_state():387] send defer: 6
+2022-07-30 08:05:56,338 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:05:56,343 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:05:56,343 INFO    HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-30 08:05:56,343 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:05:56,344 INFO    SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-30 08:05:56,344 INFO    SenderThread:2049141 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 08:05:56,444 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:05:56,444 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:05:56,546 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:05:56,546 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:05:56,647 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:05:56,648 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:05:56,749 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:05:56,749 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:05:56,832 INFO    Thread-15 :2049141 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/requirements.txt
+2022-07-30 08:05:56,836 INFO    Thread-14 :2049141 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/config.yaml
+2022-07-30 08:05:56,851 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:05:56,851 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:05:56,856 INFO    Thread-17 :2049141 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/wandb-summary.json
+2022-07-30 08:05:56,953 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:05:56,953 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:05:57,037 INFO    Thread-16 :2049141 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/files/output.log
+2022-07-30 08:05:57,054 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:05:57,055 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:05:57,157 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:05:57,157 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:05:57,237 INFO    Thread-7  :2049141 [sender.py:transition_state():387] send defer: 7
+2022-07-30 08:05:57,238 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:05:57,238 INFO    HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-30 08:05:57,238 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:05:57,238 INFO    SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-30 08:05:57,258 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:05:57,799 INFO    SenderThread:2049141 [sender.py:transition_state():387] send defer: 8
+2022-07-30 08:05:57,800 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:05:57,800 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:05:57,800 INFO    HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-30 08:05:57,801 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:05:57,801 INFO    SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-30 08:05:57,801 INFO    SenderThread:2049141 [sender.py:transition_state():387] send defer: 9
+2022-07-30 08:05:57,802 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:05:57,802 INFO    HandlerThread:2049141 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-30 08:05:57,802 DEBUG   SenderThread:2049141 [sender.py:send():234] send: final
+2022-07-30 08:05:57,802 DEBUG   SenderThread:2049141 [sender.py:send():234] send: footer
+2022-07-30 08:05:57,802 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:05:57,802 INFO    SenderThread:2049141 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-30 08:05:57,901 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:05:57,901 DEBUG   SenderThread:2049141 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:05:57,902 INFO    SenderThread:2049141 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 08:05:58,160 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-30 08:05:58,161 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-30 08:05:58,161 DEBUG   HandlerThread:2049141 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-30 08:05:58,162 INFO    HandlerThread:2049141 [handler.py:finish():731] shutting down handler
+2022-07-30 08:05:58,802 INFO    WriterThread:2049141 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/run-12xoayks.wandb
+2022-07-30 08:05:59,159 INFO    SenderThread:2049141 [sender.py:finish():1070] shutting down sender
+2022-07-30 08:05:59,159 INFO    SenderThread:2049141 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 08:05:59,159 INFO    SenderThread:2049141 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 08:05:59,163 INFO    MainThread:2049141 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220730_074919-12xoayks/logs/debug.log b/wandb/run-20220730_074919-12xoayks/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..5b10edc5df5d44047496bbeef6e42098e1ff9808
--- /dev/null
+++ b/wandb/run-20220730_074919-12xoayks/logs/debug.log
@@ -0,0 +1,148 @@
+2022-07-30 07:49:19,734 INFO    MainThread:2047809 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-30 07:49:19,734 INFO    MainThread:2047809 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-30 07:49:19,734 INFO    MainThread:2047809 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/logs/debug.log
+2022-07-30 07:49:19,734 INFO    MainThread:2047809 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_074919-12xoayks/logs/debug-internal.log
+2022-07-30 07:49:19,734 INFO    MainThread:2047809 [wandb_init.py:init():404] calling init triggers
+2022-07-30 07:49:19,734 INFO    MainThread:2047809 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-30 07:49:19,734 INFO    MainThread:2047809 [wandb_init.py:init():460] starting backend
+2022-07-30 07:49:19,734 INFO    MainThread:2047809 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-30 07:49:19,793 INFO    MainThread:2047809 [backend.py:ensure_launched():216] starting backend process...
+2022-07-30 07:49:19,838 INFO    MainThread:2047809 [backend.py:ensure_launched():221] started backend process with pid: 2049141
+2022-07-30 07:49:19,840 INFO    MainThread:2047809 [wandb_init.py:init():469] backend started and connected
+2022-07-30 07:49:19,854 INFO    MainThread:2047809 [wandb_init.py:init():533] updated telemetry
+2022-07-30 07:49:19,967 INFO    MainThread:2047809 [wandb_init.py:init():563] communicating current version
+2022-07-30 07:49:20,691 INFO    MainThread:2047809 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-30 07:49:20,692 INFO    MainThread:2047809 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-30 07:49:20,859 INFO    MainThread:2047809 [wandb_init.py:init():606] starting run threads in backend
+2022-07-30 07:49:23,291 INFO    MainThread:2047809 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-30 07:49:23,292 INFO    MainThread:2047809 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-30 07:49:23,292 INFO    MainThread:2047809 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-30 07:49:23,295 INFO    MainThread:2047809 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-30 07:49:23,295 INFO    MainThread:2047809 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-30 08:05:52,577 INFO    MainThread:2047809 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-30 08:05:52,582 INFO    MainThread:2047809 [wandb_run.py:_restore():1752] restore
+2022-07-30 08:05:55,103 INFO    MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 74317
+}
+
+2022-07-30 08:05:55,432 INFO    MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 74317
+}
+
+2022-07-30 08:05:56,343 INFO    MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 386444
+}
+
+2022-07-30 08:05:56,445 INFO    MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 386444
+}
+
+2022-07-30 08:05:56,547 INFO    MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 386444
+  total_bytes: 386444
+}
+
+2022-07-30 08:05:56,648 INFO    MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 386444
+  total_bytes: 386444
+}
+
+2022-07-30 08:05:56,750 INFO    MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 386444
+  total_bytes: 386444
+}
+
+2022-07-30 08:05:56,852 INFO    MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 386444
+  total_bytes: 386444
+}
+
+2022-07-30 08:05:56,954 INFO    MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 386444
+  total_bytes: 386444
+}
+
+2022-07-30 08:05:57,056 INFO    MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 386444
+  total_bytes: 386444
+}
+
+2022-07-30 08:05:57,158 INFO    MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 386444
+  total_bytes: 386444
+}
+
+2022-07-30 08:05:57,800 INFO    MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 386444
+  total_bytes: 386444
+}
+
+2022-07-30 08:05:58,159 INFO    MainThread:2047809 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 386444
+  total_bytes: 386444
+}
+local_info {
+}
+
+2022-07-30 08:05:59,740 INFO    MainThread:2047809 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220730_074919-12xoayks/run-12xoayks.wandb b/wandb/run-20220730_074919-12xoayks/run-12xoayks.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..c04aea630b2c69458564bd66ae80c86cd29a6a18
--- /dev/null
+++ b/wandb/run-20220730_074919-12xoayks/run-12xoayks.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a022e831647d19c663a7f103a01c4beec1d2719db4226c57beea36e1706c209e
+size 409024
diff --git a/wandb/run-20220730_081215-1j14rrmn/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_081215-1j14rrmn/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac
--- /dev/null
+++ b/wandb/run-20220730_081215-1j14rrmn/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1605 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220730_081215-1j14rrmn/files/config.yaml b/wandb/run-20220730_081215-1j14rrmn/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0ae020ab8e073f88cec1f213742de06f10904bb2
--- /dev/null
+++ b/wandb/run-20220730_081215-1j14rrmn/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659168735
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220730_081215-1j14rrmn/files/diff.patch b/wandb/run-20220730_081215-1j14rrmn/files/diff.patch
new file mode 100644
index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642
--- /dev/null
+++ b/wandb/run-20220730_081215-1j14rrmn/files/diff.patch
@@ -0,0 +1,10 @@
+diff --git a/.gitattributes b/.gitattributes
+index 755356a..f0eef4b 100644
+--- a/.gitattributes
++++ b/.gitattributes
+@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+ *.zip filter=lfs diff=lfs merge=lfs -text
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
++*.wandb filter=lfs diff=lfs merge=lfs -text
+\ No newline at end of file
diff --git a/wandb/run-20220730_081215-1j14rrmn/files/output.log b/wandb/run-20220730_081215-1j14rrmn/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..6e45f5eac8626ff8ac6383af49a5c087f5ce3f26
--- /dev/null
+++ b/wandb/run-20220730_081215-1j14rrmn/files/output.log
@@ -0,0 +1,1569 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul30_08-12-12_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=24,
+per_device_train_batch_size=24,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 77.69it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 446.17it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_hid', 'bias'), ('project_q', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('project_hid', 'kernel'), ('quantizer', 'weight_proj', 'kernel'), ('project_q', 'bias'), ('quantizer', 'codevectors')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9143.24ex/s]
+removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9051.11ex/s]
+removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8869.01ex/s]
+removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8515.83ex/s]
+removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8734.38ex/s]
+removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8108.33ex/s]
+removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8402.66ex/s]
+removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8749.77ex/s]
+removing punctuation from train split #4:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎          | 9005/9523 [00:01<00:00, 7703.37ex/s]
+removing punctuation from train split #6:  76%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                | 7204/9523 [00:00<00:00, 7350.85ex/s]
+removing punctuation from train split #8:  77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                             | 7342/9523 [00:00<00:00, 8981.46ex/s]
+removing punctuation from train split #6:  85%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                              | 8066/9523 [00:01<00:00, 7711.07ex/s]
+removing punctuation from train split #6:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊            | 8933/9523 [00:01<00:00, 7986.69ex/s]
+removing punctuation from train split #7:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌           | 8972/9523 [00:01<00:00, 7819.51ex/s]
+removing punctuation from train split #8:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎        | 9104/9523 [00:01<00:00, 8332.99ex/s]
+removing punctuation from train split #9:  86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                            | 8148/9523 [00:00<00:00, 9153.93ex/s]
+removing punctuation from train split #10:  64%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                       | 6056/9523 [00:00<00:00, 8551.74ex/s]
+removing punctuation from train split #9:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋         | 9070/9523 [00:01<00:00, 9172.58ex/s]
+removing punctuation from train split #12:  35%|████████████████████████████████████████████████████████████████████▍                                                                                                                               | 3326/9522 [00:00<00:00, 8433.42ex/s]
+removing punctuation from train split #13:  35%|████████████████████████████████████████████████████████████████████                                                                                                                                | 3304/9522 [00:00<00:00, 8383.23ex/s]
+removing punctuation from train split #12:  44%|██████████████████████████████████████████████████████████████████████████████████████▏                                                                                                             | 4190/9522 [00:00<00:00, 8504.65ex/s]
+removing punctuation from train split #15:  34%|███████████████████████████████████████████████████████████████████▍                                                                                                                                | 3277/9522 [00:00<00:00, 8313.68ex/s]
+removing punctuation from train split #16:  26%|██████████████████████████████████████████████████▌                                                                                                                                                 | 2455/9522 [00:00<00:00, 8247.38ex/s]
+removing punctuation from train split #17:  16%|████████████████████████████████▎                                                                                                                                                                   | 1567/9522 [00:00<00:01, 7901.22ex/s]
+removing punctuation from train split #18:   8%|████████████████▏                                                                                                                                                                                    | 781/9522 [00:00<00:01, 7806.36ex/s]
+removing punctuation from train split #17:  25%|█████████████████████████████████████████████████▍                                                                                                                                                  | 2402/9522 [00:00<00:00, 8104.35ex/s]
+removing punctuation from train split #18:  17%|█████████████████████████████████▌                                                                                                                                                                  | 1632/9522 [00:00<00:00, 8216.82ex/s]
+removing punctuation from train split #25: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8188.32ex/s]
+removing punctuation from train split #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8679.44ex/s]
+removing punctuation from train split #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8830.52ex/s]
+removing punctuation from train split #31: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8859.71ex/s]
+removing punctuation from train split #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8310.85ex/s]
+removing punctuation from train split #16:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 9404/9522 [00:01<00:00, 8678.97ex/s]
+removing punctuation from train split #18:  82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                    | 7777/9522 [00:00<00:00, 8162.33ex/s]
+removing punctuation from train split #17:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏  | 9386/9522 [00:01<00:00, 8094.38ex/s]
+removing punctuation from train split #18:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                 | 8671/9522 [00:01<00:00, 8389.25ex/s]
+removing punctuation from train split #19:  87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                        | 8322/9522 [00:01<00:00, 8707.34ex/s]
+removing punctuation from train split #20:  78%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                            | 7386/9522 [00:00<00:00, 7844.57ex/s]
+removing punctuation from train split #19:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏      | 9193/9522 [00:01<00:00, 7908.47ex/s]
+removing punctuation from train split #21:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                       | 7626/9522 [00:00<00:00, 8588.83ex/s]
+removing punctuation from train split #20:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏       | 9141/9522 [00:01<00:00, 8316.59ex/s]
+removing punctuation from train split #22:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                     | 7706/9522 [00:00<00:00, 8578.86ex/s]
+removing punctuation from train split #21:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉   | 9372/9522 [00:01<00:00, 8329.46ex/s]
+removing punctuation from train split #22:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                   | 8564/9522 [00:01<00:00, 8072.87ex/s]
+removing punctuation from train split #22:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9438/9522 [00:01<00:00, 8267.35ex/s]
+removing punctuation from train split #23:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                       | 7614/9522 [00:00<00:00, 7793.87ex/s]
+removing punctuation from train split #23:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9432/9522 [00:01<00:00, 8451.31ex/s]
+removing punctuation from train split #24:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                              | 8036/9522 [00:01<00:00, 8343.29ex/s]
+removing punctuation from train split #24:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍           | 8961/9522 [00:01<00:00, 8612.13ex/s]
+removing punctuation from train split #25:  87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                        | 8314/9522 [00:01<00:00, 9221.98ex/s]
+removing punctuation from train split #26:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉               | 8789/9522 [00:01<00:00, 8678.32ex/s]
+removing punctuation from train split #25:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌    | 9305/9522 [00:01<00:00, 9430.23ex/s]
+removing punctuation from train split #28:  82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                   | 7820/9522 [00:00<00:00, 8492.68ex/s]
+removing punctuation from train split #28:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌               | 8773/9522 [00:01<00:00, 8799.55ex/s]
+removing punctuation from train split #30:  63%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                         | 5956/9522 [00:00<00:00, 8623.44ex/s]
+removing punctuation from train split #30:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                       | 6830/9522 [00:00<00:00, 8251.44ex/s]
+removing punctuation from train split #31:  75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                 | 7145/9522 [00:00<00:00, 9221.92ex/s]
+removing punctuation from train split #29:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                               | 8016/9522 [00:00<00:00, 8796.98ex/s]
+removing punctuation from train split #29:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉           | 8982/9522 [00:01<00:00, 9053.42ex/s]
+removing punctuation from train split #30:  82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                   | 7784/9522 [00:00<00:00, 8632.40ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00028_of_00032.arrow8718/9522 [00:01<00:00, 8843.18ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow
+preprocess dataset #0:   0%|▉                                                                                                                                                                                                                           | 41/9497 [00:01<03:33, 44.27ex/s]
+preprocess dataset #1:   0%|▌                                                                                                                                                                                                                           | 25/9497 [00:01<04:40, 33.76ex/s]
+preprocess dataset #2:   1%|█▋                                                                                                                                                                                                                          | 72/9497 [00:02<03:41, 42.52ex/s]
+preprocess dataset #3:   0%|▉                                                                                                                                                                                                                           | 38/9497 [00:01<04:16, 36.94ex/s]
+preprocess dataset #4:   0%|▏                                                                                                                                                                                                                            | 6/9497 [00:00<18:47,  8.42ex/s]
+preprocess dataset #5:   1%|█▋                                                                                                                                                                                                                          | 74/9497 [00:02<03:17, 47.62ex/s]
+preprocess dataset #6:   0%|▌                                                                                                                                                                                                                           | 22/9497 [00:01<06:20, 24.88ex/s]
+preprocess dataset #7:   1%|██                                                                                                                                                                                                                          | 89/9497 [00:02<03:26, 45.56ex/s]
+preprocess dataset #8:   0%|▋                                                                                                                                                                                                                           | 31/9497 [00:01<04:49, 32.66ex/s]
+preprocess dataset #9:   0%|                                                                                                                                                                                                                           | 1/9497 [00:00<2:13:35,  1.18ex/s]
+preprocess dataset #10:   0%|█                                                                                                                                                                                                                          | 45/9497 [00:01<04:03, 38.89ex/s]
+preprocess dataset #11:   1%|██▏                                                                                                                                                                                                                        | 97/9496 [00:03<03:44, 41.87ex/s]
+preprocess dataset #12:   1%|█▏                                                                                                                                                                                                                         | 52/9496 [00:01<03:53, 40.40ex/s]
+preprocess dataset #13:   0%|▎                                                                                                                                                                                                                          | 16/9496 [00:01<07:08, 22.12ex/s]
+preprocess dataset #14:   1%|█▍                                                                                                                                                                                                                         | 64/9496 [00:02<04:40, 33.58ex/s]
+preprocess dataset #15:   0%|▌                                                                                                                                                                                                                          | 23/9496 [00:01<05:42, 27.64ex/s]
+preprocess dataset #16:   1%|█▉                                                                                                                                                                                                                         | 83/9496 [00:02<04:10, 37.51ex/s]
+preprocess dataset #17:   0%|▊                                                                                                                                                                                                                          | 37/9496 [00:01<04:49, 32.68ex/s]
+preprocess dataset #18:   1%|█▎                                                                                                                                                                                                                         | 59/9496 [00:02<04:30, 34.90ex/s]
+preprocess dataset #19:   1%|██▏                                                                                                                                                                                                                        | 96/9496 [00:03<04:49, 32.48ex/s]
+preprocess dataset #20:   0%|▊                                                                                                                                                                                                                          | 33/9496 [00:01<05:26, 28.94ex/s]
+preprocess dataset #21:   0%|▏                                                                                                                                                                                                                           | 8/9496 [00:01<13:50, 11.42ex/s]
+preprocess dataset #22:   1%|█▋                                                                                                                                                                                                                         | 73/9496 [00:03<04:56, 31.74ex/s]
+preprocess dataset #23:   0%|▌                                                                                                                                                                                                                          | 26/9496 [00:01<06:14, 25.28ex/s]
+preprocess dataset #24:   1%|█▌                                                                                                                                                                                                                         | 70/9496 [00:03<04:36, 34.10ex/s]
+preprocess dataset #25:   0%|▌                                                                                                                                                                                                                          | 25/9496 [00:01<06:52, 22.97ex/s]
+preprocess dataset #25:   1%|█▋                                                                                                                                                                                                                         | 71/9496 [00:03<06:22, 24.66ex/s]
+preprocess dataset #26:   1%|█▏                                                                                                                                                                                                                         | 49/9496 [00:02<06:26, 24.43ex/s]
+preprocess dataset #27:   0%|▍                                                                                                                                                                                                                          | 21/9496 [00:01<07:30, 21.03ex/s]
+preprocess dataset #27:   1%|█▌                                                                                                                                                                                                                         | 68/9496 [00:03<06:41, 23.49ex/s]
+preprocess dataset #28:   0%|▉                                                                                                                                                                                                                          | 38/9496 [00:02<06:23, 24.63ex/s]
+preprocess dataset #29:   1%|█▎                                                                                                                                                                                                                         | 56/9496 [00:02<05:18, 29.62ex/s]
+preprocess dataset #30:   0%|▋                                                                                                                                                                                                                          | 31/9496 [00:01<05:50, 26.99ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #20:  44%|███████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                         | 4198/9496 [02:17<02:30, 35.14ex/s]
+preprocess dataset #21:  44%|██████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                                          | 4142/9496 [02:16<02:32, 35.17ex/s]
+preprocess dataset #22:  43%|██████████████████████████████████████████████████████████████████████████████████████████████                                                                                                                           | 4114/9496 [02:16<02:35, 34.60ex/s]
+preprocess dataset #23:  43%|█████████████████████████████████████████████████████████████████████████████████████████████                                                                                                                            | 4071/9496 [02:14<02:43, 33.23ex/s]
+preprocess dataset #24:  43%|████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                                            | 4038/9496 [02:13<03:02, 29.89ex/s]
+preprocess dataset #25:  42%|███████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                                             | 3991/9496 [02:12<02:29, 36.82ex/s]
+preprocess dataset #26:  42%|██████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                                              | 3960/9496 [02:11<02:46, 33.29ex/s]
+preprocess dataset #27:  42%|███████████████████████████████████████████████████████████████████████████████████████████                                                                                                                              | 3983/9496 [02:10<02:47, 32.97ex/s]
+preprocess dataset #28:  41%|██████████████████████████████████████████████████████████████████████████████████████████                                                                                                                               | 3939/9496 [02:09<02:46, 33.36ex/s]
+preprocess dataset #29:  42%|██████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                                              | 3954/9496 [02:07<02:52, 32.15ex/s]
+preprocess dataset #30:  41%|████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                                                | 3876/9496 [02:06<02:46, 33.71ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #4:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                      | 8501/9497 [04:33<00:30, 32.43ex/s]
+preprocess dataset #5:  88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                         | 8402/9497 [04:32<00:30, 36.28ex/s]
+preprocess dataset #6:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                          | 8354/9497 [04:31<00:35, 32.64ex/s]
+preprocess dataset #7:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                          | 8331/9497 [04:30<00:34, 33.47ex/s]
+preprocess dataset #8:  83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                    | 7920/9497 [04:29<00:43, 36.43ex/s]
+preprocess dataset #9:  84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                  | 7996/9497 [04:28<00:39, 38.44ex/s]
+preprocess dataset #10:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                             | 8231/9497 [04:28<00:36, 34.90ex/s]
+preprocess dataset #11:  86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                               | 8129/9496 [04:27<00:35, 38.42ex/s]
+preprocess dataset #12:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                    | 7916/9496 [04:26<00:48, 32.88ex/s]
+preprocess dataset #13:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                              | 8159/9496 [04:25<00:44, 29.78ex/s]
+preprocess dataset #14:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                    | 7900/9496 [04:24<00:47, 33.59ex/s]
+preprocess dataset #15:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                   | 7963/9496 [04:23<00:38, 40.25ex/s]
+preprocess dataset #16:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                   | 7958/9496 [04:22<00:45, 33.60ex/s]
+preprocess dataset #17:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                   | 7944/9496 [04:21<00:53, 28.81ex/s]
+preprocess dataset #18:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                  | 7997/9496 [04:20<00:39, 37.64ex/s]
+preprocess dataset #19:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                | 8078/9496 [04:19<00:43, 32.36ex/s]
+preprocess dataset #20:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                    | 7890/9496 [04:17<00:50, 31.53ex/s]
+preprocess dataset #21:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                      | 7804/9496 [04:17<00:51, 32.97ex/s]
+preprocess dataset #22:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                      | 7803/9496 [04:16<00:48, 35.08ex/s]
+preprocess dataset #23:  82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                       | 7777/9496 [04:15<00:51, 33.43ex/s]
+preprocess dataset #24:  81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                         | 7671/9496 [04:14<00:59, 30.73ex/s]
+preprocess dataset #25:  80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                          | 7644/9496 [04:13<00:57, 32.44ex/s]
+preprocess dataset #26:  80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                           | 7618/9496 [04:11<00:58, 32.18ex/s]
+preprocess dataset #27:  81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                        | 7732/9496 [04:10<00:54, 32.37ex/s]
+preprocess dataset #28:  80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                          | 7630/9496 [04:09<01:03, 29.32ex/s]
+preprocess dataset #29:  80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                          | 7634/9496 [04:08<00:58, 31.57ex/s]
+preprocess dataset #30:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                            | 7571/9496 [04:07<01:00, 31.79ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #29:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                    | 8616/9496 [04:40<00:24, 35.53ex/s]
+preprocess dataset #30:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                     | 8542/9496 [04:39<00:26, 35.90ex/s]
+preprocess dataset #29:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                  | 8676/9496 [04:42<00:27, 30.04ex/s]
+preprocess dataset #30:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                    | 8599/9496 [04:41<00:27, 33.11ex/s]
+preprocess dataset #30:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                   | 8661/9496 [04:43<00:33, 24.93ex/s]
+preprocess dataset #29:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍               | 8814/9496 [04:46<00:17, 39.21ex/s]
+preprocess dataset #30:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                 | 8730/9496 [04:45<00:21, 36.40ex/s]
+preprocess dataset #29:  94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉              | 8879/9496 [04:48<00:19, 31.25ex/s]
+preprocess dataset #30:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                | 8799/9496 [04:47<00:21, 32.97ex/s]
+
+preprocess dataset #30:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████             | 8931/9496 [04:51<00:16, 33.46ex/s]
+preprocess dataset #29:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍         | 9080/9496 [04:54<00:10, 39.28ex/s]
+preprocess dataset #30:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌           | 8995/9496 [04:53<00:12, 39.74ex/s]
+preprocess dataset #28:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍        | 9122/9496 [04:57<00:10, 36.51ex/s]
+preprocess dataset #29:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏       | 9154/9496 [04:56<00:07, 48.51ex/s]
+preprocess dataset #30:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉          | 9053/9496 [04:55<00:12, 34.43ex/s]
+preprocess dataset #25:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊     | 9267/9496 [05:03<00:04, 54.36ex/s]
+preprocess dataset #26:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉     | 9272/9496 [05:02<00:03, 58.50ex/s]
+preprocess dataset #27:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋   | 9352/9496 [05:00<00:02, 56.13ex/s]
+preprocess dataset #28:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████      | 9238/9496 [05:00<00:04, 61.89ex/s]
+preprocess dataset #29:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋     | 9264/9496 [04:58<00:03, 60.60ex/s]
+preprocess dataset #30:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍       | 9166/9496 [04:57<00:05, 60.97ex/s]
+preprocess dataset #30:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊     | 9270/9496 [04:59<00:03, 59.51ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9441/9496 [05:03<00:00, 62.57ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 9448/9496 [05:03<00:00, 58.73ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 9488/9496 [05:02<00:00, 59.58ex/s]
+preprocess dataset #30:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 9401/9496 [05:01<00:01, 57.34ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 9490/9496 [05:04<00:00, 55.82ex/s]
+preprocess dataset #30:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9423/9496 [05:01<00:01, 65.02ex/s]
+preprocess dataset #30:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9446/9496 [05:02<00:00, 59.45ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 9490/9496 [05:02<00:00, 66.83ex/s]
+preprocess dataset #31: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9453/9496 [05:01<00:00, 73.43ex/s]
+preprocess dataset #3:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #4:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #5:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #6:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #7:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #8:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #9:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #10:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #11:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #12:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #13:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #14:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #15:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #16:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #17:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #18:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #19:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #20:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #21:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #22:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #23:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #24:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #25:   3%|██████▏                                                                                                                                                                                                                    | 36/1266 [00:01<00:37, 32.78ex/s]
+preprocess dataset #26:   3%|██████▏                                                                                                                                                                                                                    | 36/1266 [00:01<00:39, 30.99ex/s]
+preprocess dataset #27:   3%|█████▉                                                                                                                                                                                                                     | 34/1266 [00:01<00:37, 32.62ex/s]
+preprocess dataset #28:   3%|█████▋                                                                                                                                                                                                                     | 33/1266 [00:01<00:39, 31.17ex/s]
+preprocess dataset #29:   2%|█████                                                                                                                                                                                                                      | 29/1266 [00:01<00:41, 30.15ex/s]
+preprocess dataset #30:   2%|████▊                                                                                                                                                                                                                      | 28/1266 [00:01<00:42, 29.25ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #31:  79%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                              | 999/1266 [00:29<00:08, 32.66ex/s]
+preprocess dataset #29:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                | 1076/1266 [00:39<00:05, 33.72ex/s]
+preprocess dataset #30:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                   | 1062/1266 [00:39<00:05, 34.14ex/s]
+
+preprocess dataset #29:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏      | 1226/1266 [00:43<00:01, 35.68ex/s]
+preprocess dataset #30:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊        | 1218/1266 [00:43<00:01, 40.85ex/s]
+preprocess dataset #20: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 1264/1266 [00:45<00:00, 38.44ex/s]
+preprocess dataset #17:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 1258/1267 [00:45<00:00, 33.20ex/s]
+preprocess dataset #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1262/1267 [00:45<00:00, 33.00ex/s]
+preprocess dataset #16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1267/1267 [00:45<00:00, 38.55ex/s]
+preprocess dataset #25:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍   | 1245/1266 [00:44<00:00, 32.61ex/s]
+preprocess dataset #23:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 1252/1266 [00:45<00:00, 32.94ex/s]
+preprocess dataset #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1266/1267 [00:45<00:00, 28.94ex/s]
+preprocess dataset #17: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 1264/1267 [00:45<00:00, 37.46ex/s]
+preprocess dataset #23:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 1256/1266 [00:45<00:00, 33.99ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 1258/1266 [00:44<00:00, 39.39ex/s]
+preprocess dataset #25:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 1258/1266 [00:45<00:00, 36.18ex/s]
+preprocess dataset #30:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 1256/1266 [00:44<00:00, 38.20ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 1264/1266 [00:44<00:00, 42.93ex/s]
+preprocess dataset #23: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1261/1266 [00:45<00:00, 37.28ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 1262/1266 [00:44<00:00, 43.43ex/s]
+
+preprocess dataset #7:   2%|████▌                                                                                                                                                                                                                       | 53/2554 [00:02<01:11, 34.82ex/s]
+preprocess dataset #8:   2%|████▎                                                                                                                                                                                                                       | 50/2554 [00:02<01:13, 33.94ex/s]
+preprocess dataset #9:   2%|████▋                                                                                                                                                                                                                       | 54/2554 [00:02<01:10, 35.49ex/s]
+preprocess dataset #10:   2%|████                                                                                                                                                                                                                       | 48/2554 [00:02<01:13, 34.00ex/s]
+preprocess dataset #11:   2%|████▏                                                                                                                                                                                                                      | 49/2554 [00:02<01:17, 32.47ex/s]
+preprocess dataset #12:   2%|████▏                                                                                                                                                                                                                      | 49/2554 [00:02<01:15, 33.03ex/s]
+preprocess dataset #13:   2%|███▋                                                                                                                                                                                                                       | 43/2554 [00:02<01:17, 32.57ex/s]
+preprocess dataset #14:   2%|███▊                                                                                                                                                                                                                       | 44/2554 [00:02<01:17, 32.24ex/s]
+preprocess dataset #15:   2%|███▋                                                                                                                                                                                                                       | 43/2554 [00:02<01:13, 33.98ex/s]
+preprocess dataset #16:   2%|███▌                                                                                                                                                                                                                       | 42/2554 [00:02<01:11, 35.19ex/s]
+preprocess dataset #17:   2%|███▊                                                                                                                                                                                                                       | 44/2554 [00:02<01:13, 34.06ex/s]
+preprocess dataset #18:   2%|███▊                                                                                                                                                                                                                       | 45/2554 [00:02<01:14, 33.55ex/s]
+preprocess dataset #19:   1%|███▏                                                                                                                                                                                                                       | 37/2554 [00:01<01:16, 32.71ex/s]
+preprocess dataset #20:   1%|███                                                                                                                                                                                                                        | 35/2554 [00:02<01:18, 32.03ex/s]
+preprocess dataset #21:   1%|███▎                                                                                                                                                                                                                       | 38/2554 [00:01<01:19, 31.50ex/s]
+preprocess dataset #22:   1%|██▉                                                                                                                                                                                                                        | 34/2554 [00:01<01:21, 30.94ex/s]
+preprocess dataset #23:   1%|███                                                                                                                                                                                                                        | 35/2554 [00:01<01:22, 30.53ex/s]
+preprocess dataset #24:   1%|██▋                                                                                                                                                                                                                        | 31/2554 [00:01<01:20, 31.48ex/s]
+preprocess dataset #25:   1%|██▋                                                                                                                                                                                                                        | 32/2554 [00:01<01:20, 31.42ex/s]
+preprocess dataset #26:   1%|██▊                                                                                                                                                                                                                        | 33/2554 [00:01<01:21, 30.85ex/s]
+preprocess dataset #27:   1%|██▋                                                                                                                                                                                                                        | 31/2554 [00:01<01:19, 31.70ex/s]
+preprocess dataset #28:   1%|██                                                                                                                                                                                                                         | 24/2554 [00:01<01:36, 26.24ex/s]
+preprocess dataset #29:   1%|██▎                                                                                                                                                                                                                        | 27/2554 [00:01<01:24, 29.77ex/s]
+preprocess dataset #30:   1%|██▏                                                                                                                                                                                                                        | 25/2554 [00:01<01:35, 26.61ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #31:  63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                 | 1598/2554 [00:50<00:29, 32.59ex/s]
+preprocess dataset #25: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 2553/2554 [01:21<00:00, 21.06ex/s]
+preprocess dataset #31: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2550/2554 [01:21<00:00, 32.69ex/s]
+#7: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 561.91ba/s]
+#8: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 548.36ba/s]
+#5: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 380.59ba/s]
+#9: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 533.38ba/s]
+#10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 535.57ba/s]
+#11: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 509.83ba/s]
+#12: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 538.53ba/s]
+#13: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 537.91ba/s]
+#14: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 544.89ba/s]
+#15: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 552.77ba/s]
+#16: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 533.30ba/s]
+#17: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 549.15ba/s]
+#18: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 553.21ba/s]
+#20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 568.50ba/s]
+#19: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 383.43ba/s]
+#22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 577.67ba/s]
+#21: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 449.72ba/s]
+#24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 590.52ba/s]
+#25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 572.01ba/s]
+#23: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 441.52ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 584.43ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 587.63ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 591.29ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 585.86ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 578.15ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 371.31ba/s]
+#19:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#31:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#16:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 295.86ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 327.86ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 319.29ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 333.02ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 320.36ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 321.28ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 320.00ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 337.63ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 235.76ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 241.31ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 125.38ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 127.73ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 129.50ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 132.23ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 133.62ba/s]
+https://symbolize.stripped_domain/r/?trace=7f2908b183f4,7f2908b6c0bf,7f,bcf81b1f3d49902e&map= ████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                        | 9/10 [00:00<00:00, 84.14ba/s]
+*** SIGTERM received by PID 71980 (TID 71980) on cpu 76 from PID 1065426; stack trace: ***                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+PC: @     0x7f2908b183f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7f26b935b294        976  (unknown)
+    @     0x7f2908b6c0c0  746618000  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @               0x80  (unknown)  (unknown)
+    @ 0xbcf81b1f3d49902f  (unknown)  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+https://symbolize.stripped_domain/r/?trace=7f2908b183f4,7f26b935b293,7f2908b6c0bf,7f,bcf81b1f3d49902e&map=fbcd4e3f2be272741f2aecd9d840a066:7f26a3dbe000-7f26b96edc60
+E0730 08:24:06.252142   71980 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                              | 0/10 [00:00<?, ?ba/s]
+E0730 08:24:06.329746   71980 process_state.cc:774] RAW: Raising signal 15 with default behavior                                                                                                                                                                   | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#10: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 98.24ba/s]
+#11: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 95.03ba/s]
+#12: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 97.56ba/s]
+#13: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 102.30ba/s]
+#14: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 104.40ba/s]
+#15: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 103.45ba/s]
+#16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 102.32ba/s]
+#18: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 99.67ba/s]
+#17: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 67.27ba/s]
+#19: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 70.35ba/s]
+#20: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 96.00ba/s]
+#21: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 81.90ba/s]
+#23: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 99.05ba/s]
+#22: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 71.81ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 103.95ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 101.46ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 104.49ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 112.52ba/s]
+#27: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 76.50ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 112.68ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 116.10ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 119.99ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 70.97ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 112.77ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 115.33ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+  "add_adapter": false,                                                                                                                                                                                                                                             | 0/3 [00:00<?, ?ba/s]
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.094,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.04,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": true,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.047,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.041,
+  "mask_feature_length": 64,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.25,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.082,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 38,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 39,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file ./preprocessor_config.json
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+2022-07-30 08:24:24.654643: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
+2022-07-30 08:24:24.654682: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
+/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+INFO:__main__:***** Running training *****
+INFO:__main__:  Num examples = 302693
+INFO:__main__:  Num Epochs = 40
+INFO:__main__:  Instantaneous batch size per device = 24
+INFO:__main__:  Num gradient accumulation steps = 1
+INFO:__main__:  Total train batch size (w. parallel & distributed) = 192
+INFO:__main__:  Total optimization steps = 63040
+INFO:__main__:  Gradient checkpointing: True
+INFO:__main__:  Use scan: False
+INFO:__main__:  Fuse matmuls: False
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [00:00<?, ?it/s]
+
+  grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)                                                                                                                                                                                     | 0/1576 [00:00<?, ?it/s]
+run_flax_speech_recognition_ctc.py:1392: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+run_flax_speech_recognition_ctc.py:1399: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+Training...:   0%|                                                                                                                                                                                                                                               | 0/1576 [04:14<?, ?it/s]
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [04:21<?, ?it/s]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback
+    return fun(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss
+    out_tree, out_flat = f_pmapped_(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f
+    out = pxla.xla_pmap(
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind
+    return map_bind(self, fun, *args, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind
+    outs = primitive.process(top_trace, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process
+    return trace.process_map(self, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call
+    return primitive.impl(f, *tracers, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 829, in xla_pmap_impl
+    compiled_fun, fingerprint = parallel_callable(
+  File "/data/flax/lib/python3.8/site-packages/jax/linear_util.py", line 295, in memoized_fun
+    ans = call(fun, *args)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 860, in parallel_callable
+    pmap_executable = pmap_computation.compile()
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper
+    return func(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1137, in compile
+    self._executable = PmapExecutable.from_hlo(self._hlo, **self.compile_args)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1285, in from_hlo
+    compiled = dispatch.compile_or_get_cached(
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 899, in compile_or_get_cached
+    return backend_compile(backend, computation, compile_options, host_callbacks)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper
+    return func(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 843, in backend_compile
+    return backend.compile(built_c, compile_options=options)
+jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 20.24G of 15.48G hbm. Exceeded hbm capacity by 4.75G.
+Total hbm usage >= 20.75G:
+    reserved        530.00M
+    program           9.42G
+    arguments        10.82G
+Output size 10.76G; shares 10.76G with arguments.
+Program hbm requirement 9.42G:
+    global           196.0K
+    scoped           72.08M
+    HLO temp          9.35G (98.7% utilization: Unpadded (8.88G) Padded (9.00G), 3.7% fragmentation (354.84M))
+  Largest program allocations in hbm:
+  1. Size: 1.46G
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/42/remat(core_fn)/42/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95
+     Shape: f32[24,16,999,999]{2,3,1,0:T(8,128)}
+     Unpadded size: 1.43G
+     Extra memory due to padding: 38.08M (1.0x expansion)
+     XLA label: fusion.24028 = fusion(fusion.14571, bitcast.7348, bitcast.7346), kind=kOutput, calls=fused_computation.18801
+     Allocation type: HLO temp
+     ==========================
+  2. Size: 1.46G
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/42/remat(core_fn)/42/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95
+     Shape: f32[24,16,999,999]{2,3,1,0:T(8,128)}
+     Unpadded size: 1.43G
+     Extra memory due to padding: 38.08M (1.0x expansion)
+     XLA label: fusion.188.remat5 = fusion(bitcast.7348, bitcast.7346, fusion.14571), kind=kOutput, calls=fused_computation.184.clone.clone.clone.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  3. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.7200 = fusion(get-tuple-element.9849, get-tuple-element.17172, get-tuple-element.10141, get-tuple-element.17171, ...(+3)), kind=kLoop, calls=fused_computation.6644
+     Allocation type: HLO temp
+     ==========================
+  4. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/36/36/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20050 = fusion(fusion.6760.remat, get-tuple-element.13727, fusion.1609, bitcast.11036), kind=kOutput, calls=fused_computation.18616
+     Allocation type: HLO temp
+     ==========================
+  5. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/34/34/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20054 = fusion(fusion.6764.remat, get-tuple-element.13695, fusion.1611, bitcast.11040), kind=kOutput, calls=fused_computation.18620
+     Allocation type: HLO temp
+     ==========================
+  6. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/41/41/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20040 = fusion(fusion.6750.remat, get-tuple-element.13823, fusion.1604, bitcast.11026), kind=kOutput, calls=fused_computation.18606
+     Allocation type: HLO temp
+     ==========================
+  7. Size: 117.07M
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: copy.17574 = copy(bitcast.14704)
+     Allocation type: HLO temp
+     ==========================
+  8. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20124 = fusion(fusion.6836, copy.17574, get-tuple-element.14005, fusion.8741, ...(+1)), kind=kLoop, calls=fused_computation.18690
+     Allocation type: HLO temp
+     ==========================
+  9. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/40/40/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20042 = fusion(fusion.6752.remat, get-tuple-element.13807, fusion.1605, bitcast.11028), kind=kOutput, calls=fused_computation.18608
+     Allocation type: HLO temp
+     ==========================
+  10. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20122 = fusion(get-tuple-element.11239, get-tuple-element.13247, fusion.1645, bitcast.11108), kind=kOutput, calls=fused_computation.18688
+     Allocation type: HLO temp
+     ==========================
+  11. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20120 = fusion(get-tuple-element.11243, get-tuple-element.13263, fusion.1644, bitcast.11106), kind=kOutput, calls=fused_computation.18686
+     Allocation type: HLO temp
+     ==========================
+  12. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20118 = fusion(get-tuple-element.11247, get-tuple-element.13439, fusion.1643, bitcast.11104), kind=kOutput, calls=fused_computation.18684
+     Allocation type: HLO temp
+     ==========================
+  13. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20116 = fusion(get-tuple-element.11251, get-tuple-element.13615, fusion.1642, bitcast.11102), kind=kOutput, calls=fused_computation.18682
+     Allocation type: HLO temp
+     ==========================
+  14. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20114 = fusion(get-tuple-element.11255, get-tuple-element.13791, fusion.1641, bitcast.11100), kind=kOutput, calls=fused_computation.18680
+     Allocation type: HLO temp
+     ==========================
+  15. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20112 = fusion(get-tuple-element.11259, get-tuple-element.13935, fusion.1640, bitcast.11098), kind=kOutput, calls=fused_computation.18678
+     Allocation type: HLO temp
+     ==========================
+  16. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20110 = fusion(get-tuple-element.11263, get-tuple-element.13951, fusion.1639, bitcast.11096), kind=kOutput, calls=fused_computation.18676
+     Allocation type: HLO temp
+     ==========================
+  17. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20108 = fusion(get-tuple-element.11267, get-tuple-element.13967, fusion.1638, bitcast.11094), kind=kOutput, calls=fused_computation.18674
+     Allocation type: HLO temp
+     ==========================
+  18. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20106 = fusion(get-tuple-element.11271, get-tuple-element.13983, fusion.1637, bitcast.11092), kind=kOutput, calls=fused_computation.18672
+     Allocation type: HLO temp
+     ==========================
+  19. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20104 = fusion(get-tuple-element.11275, get-tuple-element.13999, fusion.1636, bitcast.11090), kind=kOutput, calls=fused_computation.18670
+     Allocation type: HLO temp
+     ==========================
+  20. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/10/10/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20102 = fusion(get-tuple-element.11279, get-tuple-element.13279, fusion.1635, bitcast.11088), kind=kOutput, calls=fused_computation.18668
+     Allocation type: HLO temp
+     ==========================
+The stack trace below excludes JAX-internal frames.
+The preceding is the original exception that occurred, unmodified.
+--------------------
+The above exception was the direct cause of the following exception:
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 20.24G of 15.48G hbm. Exceeded hbm capacity by 4.75G.
+Total hbm usage >= 20.75G:
+    reserved        530.00M
+    program           9.42G
+    arguments        10.82G
+Output size 10.76G; shares 10.76G with arguments.
+Program hbm requirement 9.42G:
+    global           196.0K
+    scoped           72.08M
+    HLO temp          9.35G (98.7% utilization: Unpadded (8.88G) Padded (9.00G), 3.7% fragmentation (354.84M))
+  Largest program allocations in hbm:
+  1. Size: 1.46G
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/42/remat(core_fn)/42/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95
+     Shape: f32[24,16,999,999]{2,3,1,0:T(8,128)}
+     Unpadded size: 1.43G
+     Extra memory due to padding: 38.08M (1.0x expansion)
+     XLA label: fusion.24028 = fusion(fusion.14571, bitcast.7348, bitcast.7346), kind=kOutput, calls=fused_computation.18801
+     Allocation type: HLO temp
+     ==========================
+  2. Size: 1.46G
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/42/remat(core_fn)/42/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95
+     Shape: f32[24,16,999,999]{2,3,1,0:T(8,128)}
+     Unpadded size: 1.43G
+     Extra memory due to padding: 38.08M (1.0x expansion)
+     XLA label: fusion.188.remat5 = fusion(bitcast.7348, bitcast.7346, fusion.14571), kind=kOutput, calls=fused_computation.184.clone.clone.clone.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  3. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.7200 = fusion(get-tuple-element.9849, get-tuple-element.17172, get-tuple-element.10141, get-tuple-element.17171, ...(+3)), kind=kLoop, calls=fused_computation.6644
+     Allocation type: HLO temp
+     ==========================
+  4. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/36/36/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20050 = fusion(fusion.6760.remat, get-tuple-element.13727, fusion.1609, bitcast.11036), kind=kOutput, calls=fused_computation.18616
+     Allocation type: HLO temp
+     ==========================
+  5. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/34/34/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20054 = fusion(fusion.6764.remat, get-tuple-element.13695, fusion.1611, bitcast.11040), kind=kOutput, calls=fused_computation.18620
+     Allocation type: HLO temp
+     ==========================
+  6. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/41/41/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20040 = fusion(fusion.6750.remat, get-tuple-element.13823, fusion.1604, bitcast.11026), kind=kOutput, calls=fused_computation.18606
+     Allocation type: HLO temp
+     ==========================
+  7. Size: 117.07M
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: copy.17574 = copy(bitcast.14704)
+     Allocation type: HLO temp
+     ==========================
+  8. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20124 = fusion(fusion.6836, copy.17574, get-tuple-element.14005, fusion.8741, ...(+1)), kind=kLoop, calls=fused_computation.18690
+     Allocation type: HLO temp
+     ==========================
+  9. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/40/40/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20042 = fusion(fusion.6752.remat, get-tuple-element.13807, fusion.1605, bitcast.11028), kind=kOutput, calls=fused_computation.18608
+     Allocation type: HLO temp
+     ==========================
+  10. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20122 = fusion(get-tuple-element.11239, get-tuple-element.13247, fusion.1645, bitcast.11108), kind=kOutput, calls=fused_computation.18688
+     Allocation type: HLO temp
+     ==========================
+  11. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20120 = fusion(get-tuple-element.11243, get-tuple-element.13263, fusion.1644, bitcast.11106), kind=kOutput, calls=fused_computation.18686
+     Allocation type: HLO temp
+     ==========================
+  12. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20118 = fusion(get-tuple-element.11247, get-tuple-element.13439, fusion.1643, bitcast.11104), kind=kOutput, calls=fused_computation.18684
+     Allocation type: HLO temp
+     ==========================
+  13. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20116 = fusion(get-tuple-element.11251, get-tuple-element.13615, fusion.1642, bitcast.11102), kind=kOutput, calls=fused_computation.18682
+     Allocation type: HLO temp
+     ==========================
+  14. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20114 = fusion(get-tuple-element.11255, get-tuple-element.13791, fusion.1641, bitcast.11100), kind=kOutput, calls=fused_computation.18680
+     Allocation type: HLO temp
+     ==========================
+  15. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20112 = fusion(get-tuple-element.11259, get-tuple-element.13935, fusion.1640, bitcast.11098), kind=kOutput, calls=fused_computation.18678
+     Allocation type: HLO temp
+     ==========================
+  16. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20110 = fusion(get-tuple-element.11263, get-tuple-element.13951, fusion.1639, bitcast.11096), kind=kOutput, calls=fused_computation.18676
+     Allocation type: HLO temp
+     ==========================
+  17. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20108 = fusion(get-tuple-element.11267, get-tuple-element.13967, fusion.1638, bitcast.11094), kind=kOutput, calls=fused_computation.18674
+     Allocation type: HLO temp
+     ==========================
+  18. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20106 = fusion(get-tuple-element.11271, get-tuple-element.13983, fusion.1637, bitcast.11092), kind=kOutput, calls=fused_computation.18672
+     Allocation type: HLO temp
+     ==========================
+  19. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20104 = fusion(get-tuple-element.11275, get-tuple-element.13999, fusion.1636, bitcast.11090), kind=kOutput, calls=fused_computation.18670
+     Allocation type: HLO temp
+     ==========================
+  20. Size: 117.07M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/10/10/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[24,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 117.07M
+     XLA label: fusion.20102 = fusion(get-tuple-element.11279, get-tuple-element.13279, fusion.1635, bitcast.11088), kind=kOutput, calls=fused_computation.18668
+     Allocation type: HLO temp
+     ==========================
\ No newline at end of file
diff --git a/wandb/run-20220730_081215-1j14rrmn/files/requirements.txt b/wandb/run-20220730_081215-1j14rrmn/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab
--- /dev/null
+++ b/wandb/run-20220730_081215-1j14rrmn/files/requirements.txt
@@ -0,0 +1,158 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+exceptiongroup==1.0.0rc8
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+hypothesis==6.53.0
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+jiwer==2.3.0
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pyctcdecode==0.4.0
+pygments==2.11.1
+pygtrie==2.5.0
+pyparsing==3.0.6
+python-dateutil==2.8.2
+python-levenshtein==0.12.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+sortedcontainers==2.4.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220730_081215-1j14rrmn/files/wandb-metadata.json b/wandb/run-20220730_081215-1j14rrmn/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..fc26413d143e0a899730725a474903daca138a98
--- /dev/null
+++ b/wandb/run-20220730_081215-1j14rrmn/files/wandb-metadata.json
@@ -0,0 +1,66 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-30T08:12:19.247538",
+    "startedAt": "2022-07-30T08:12:15.884916",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=./",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=24",
+        "--per_device_eval_batch_size=24",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220730_081215-1j14rrmn/files/wandb-summary.json b/wandb/run-20220730_081215-1j14rrmn/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..68aedcb3f4d7db9af7ba584547258d149c6f582c
--- /dev/null
+++ b/wandb/run-20220730_081215-1j14rrmn/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 1000}}
\ No newline at end of file
diff --git a/wandb/run-20220730_081215-1j14rrmn/logs/debug-internal.log b/wandb/run-20220730_081215-1j14rrmn/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..8507fbbbf1042e54098fa61f87a455bfa1f723ed
--- /dev/null
+++ b/wandb/run-20220730_081215-1j14rrmn/logs/debug-internal.log
@@ -0,0 +1,555 @@
+2022-07-30 08:12:16,797 INFO    MainThread:1066697 [internal.py:wandb_internal():87] W&B internal server running at pid: 1066697, started at: 2022-07-30 08:12:16.796960
+2022-07-30 08:12:16,799 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: check_version
+2022-07-30 08:12:16,799 INFO    WriterThread:1066697 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/run-1j14rrmn.wandb
+2022-07-30 08:12:16,800 DEBUG   SenderThread:1066697 [sender.py:send():234] send: header
+2022-07-30 08:12:16,800 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: check_version
+2022-07-30 08:12:16,838 DEBUG   SenderThread:1066697 [sender.py:send():234] send: run
+2022-07-30 08:12:17,015 INFO    SenderThread:1066697 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files
+2022-07-30 08:12:17,015 INFO    SenderThread:1066697 [sender.py:_start_run_threads():804] run started: 1j14rrmn with start time 1659168735
+2022-07-30 08:12:17,015 DEBUG   SenderThread:1066697 [sender.py:send():234] send: summary
+2022-07-30 08:12:17,015 INFO    SenderThread:1066697 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 08:12:17,015 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: run_start
+2022-07-30 08:12:18,017 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/wandb-summary.json
+2022-07-30 08:12:19,247 DEBUG   HandlerThread:1066697 [meta.py:__init__():40] meta init
+2022-07-30 08:12:19,247 DEBUG   HandlerThread:1066697 [meta.py:__init__():54] meta init done
+2022-07-30 08:12:19,247 DEBUG   HandlerThread:1066697 [meta.py:probe():214] probe
+2022-07-30 08:12:19,248 DEBUG   HandlerThread:1066697 [meta.py:_setup_git():204] setup git
+2022-07-30 08:12:19,287 DEBUG   HandlerThread:1066697 [meta.py:_setup_git():211] setup git done
+2022-07-30 08:12:19,287 DEBUG   HandlerThread:1066697 [meta.py:_save_code():92] save code
+2022-07-30 08:12:19,300 DEBUG   HandlerThread:1066697 [meta.py:_save_code():113] save code done
+2022-07-30 08:12:19,300 DEBUG   HandlerThread:1066697 [meta.py:_save_patches():130] save patches
+2022-07-30 08:12:19,374 DEBUG   HandlerThread:1066697 [meta.py:_save_patches():172] save patches done
+2022-07-30 08:12:19,374 DEBUG   HandlerThread:1066697 [meta.py:_save_pip():58] save pip
+2022-07-30 08:12:19,375 DEBUG   HandlerThread:1066697 [meta.py:_save_pip():72] save pip done
+2022-07-30 08:12:19,375 DEBUG   HandlerThread:1066697 [meta.py:probe():252] probe done
+2022-07-30 08:12:19,378 DEBUG   SenderThread:1066697 [sender.py:send():234] send: files
+2022-07-30 08:12:19,378 INFO    SenderThread:1066697 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-30 08:12:19,379 INFO    SenderThread:1066697 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-30 08:12:19,379 INFO    SenderThread:1066697 [sender.py:_save_file():939] saving file diff.patch with policy now
+2022-07-30 08:12:19,385 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:12:19,386 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:12:19,853 INFO    Thread-13 :1066697 [upload_job.py:push():137] Uploaded file /tmp/tmpcv_bspjcwandb/11b76ghg-diff.patch
+2022-07-30 08:12:19,923 INFO    Thread-11 :1066697 [upload_job.py:push():137] Uploaded file /tmp/tmpcv_bspjcwandb/ttzg9uht-wandb-metadata.json
+2022-07-30 08:12:20,021 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:12:20,021 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/requirements.txt
+2022-07-30 08:12:20,021 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/diff.patch
+2022-07-30 08:12:20,022 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/code/run_flax_speech_recognition_ctc.py
+2022-07-30 08:12:20,022 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/wandb-metadata.json
+2022-07-30 08:12:20,022 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/code
+2022-07-30 08:12:20,058 INFO    Thread-12 :1066697 [upload_job.py:push():137] Uploaded file /tmp/tmpcv_bspjcwandb/3u6u0iox-code/run_flax_speech_recognition_ctc.py
+2022-07-30 08:12:22,022 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:12:24,023 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:12:26,024 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:12:28,025 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:12:34,029 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:12:34,522 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:12:34,523 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:12:36,030 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:12:47,324 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:12:48,036 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:12:49,682 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:12:49,683 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:12:50,037 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:13:01,042 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:13:03,044 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:13:04,886 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:13:04,886 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:13:17,051 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:13:17,400 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:13:19,052 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:13:20,032 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:13:20,033 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:13:35,188 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:13:35,188 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:13:47,472 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:13:50,379 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:13:50,380 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:14:01,072 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:04,073 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:05,655 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:14:05,655 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:14:06,074 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:08,075 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:10,076 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:12,077 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:14,078 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:16,080 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:17,538 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:14:18,081 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:20,082 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:20,979 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:14:20,980 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:14:22,083 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:24,084 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:26,085 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:28,086 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:30,090 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:32,091 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:34,092 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:36,093 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:36,163 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:14:36,164 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:14:38,094 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:40,095 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:42,096 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:44,097 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:46,098 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:47,614 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:14:48,099 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:50,099 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:51,300 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:14:51,300 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:14:52,100 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:54,101 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:56,102 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:14:58,103 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:00,104 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:02,105 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:04,110 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:06,111 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:06,461 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:15:06,461 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:15:08,112 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:10,113 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:12,114 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:14,115 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:16,116 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:17,684 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:15:18,117 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:20,118 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:21,596 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:15:21,597 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:15:22,120 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:24,121 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:27,122 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:29,123 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:31,124 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:33,125 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:35,126 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:36,740 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:15:36,740 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:15:37,127 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:39,129 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:41,130 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:43,131 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:45,132 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:47,133 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:47,767 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:15:49,134 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:51,135 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:51,880 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:15:51,881 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:15:53,136 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:55,137 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:57,138 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:15:59,140 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:01,141 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:03,142 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:05,144 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:07,069 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:16:07,070 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:16:07,145 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:09,146 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:11,147 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:13,148 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:15,149 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:17,150 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:17,847 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:16:19,151 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:21,152 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:22,207 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:16:22,207 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:16:23,153 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:25,155 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:27,156 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:29,157 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:31,158 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:33,159 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:35,160 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:37,161 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:37,354 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:16:37,355 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:16:39,162 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:41,163 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:43,164 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:45,165 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:47,167 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:47,929 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:16:49,168 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:51,169 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:52,496 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:16:52,496 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:16:53,172 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:55,173 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:57,174 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:16:59,176 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:01,177 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:03,178 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:05,179 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:07,180 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:07,679 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:17:07,680 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:17:09,181 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:11,182 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:13,183 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:15,184 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:17,186 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:18,012 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:17:19,187 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:21,189 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:22,820 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:17:22,820 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:17:23,190 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:25,191 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:27,192 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:29,193 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:31,194 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:33,195 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:35,196 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:37,197 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:37,961 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:17:37,961 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:17:39,198 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:41,200 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:43,201 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:45,202 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:47,204 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:48,090 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:17:49,205 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:51,206 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:53,098 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:17:53,098 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:17:53,207 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:55,208 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:57,209 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:17:59,210 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:01,211 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:03,212 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:05,214 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:07,215 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:08,239 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:18:08,239 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:18:09,216 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:11,217 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:13,218 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:15,219 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:17,220 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:18,173 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:18:19,221 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:21,222 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:23,223 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:23,381 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:18:23,381 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:18:25,224 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:27,225 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:29,226 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:31,226 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:33,227 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:35,232 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:37,232 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:38,544 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:18:38,544 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:18:39,233 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:41,235 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:43,236 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:45,237 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:47,238 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:48,259 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:18:49,240 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:51,241 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:53,243 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:53,684 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:18:53,684 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:18:55,244 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:18:58,245 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:00,247 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:02,248 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:04,249 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:06,250 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:08,251 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:08,830 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:19:08,831 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:19:10,252 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:12,253 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:14,254 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:16,255 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:18,256 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:18,345 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:19:20,259 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:22,260 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:23,968 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:19:23,969 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:19:24,262 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:26,263 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:28,264 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:30,265 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:32,266 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:19:39,127 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:19:39,127 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:19:48,419 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:19:54,261 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:19:54,261 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:20:09,418 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:20:09,418 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:20:10,283 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:12,284 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:14,285 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:16,286 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:18,287 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:18,491 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:20:20,288 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:22,289 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:24,290 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:24,561 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:20:24,562 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:20:26,291 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:28,293 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:30,294 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:32,295 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:34,296 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:36,297 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:38,298 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:39,702 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:20:39,702 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:20:40,299 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:48,302 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:48,565 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:20:50,304 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:52,305 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:54,306 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:20:54,850 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:20:54,850 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:20:56,307 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:21:09,990 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:21:09,991 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:21:18,635 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:21:25,124 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:21:25,125 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:21:35,324 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:21:37,325 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:21:39,326 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:21:40,258 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:21:40,258 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:21:41,327 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:21:43,328 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:21:45,330 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:21:47,331 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:21:48,706 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:21:49,332 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:21:51,333 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:21:53,339 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:21:55,340 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:21:55,396 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:21:55,396 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:21:57,341 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:21:59,341 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:22:01,343 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:22:03,344 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:22:05,345 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:22:09,347 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:22:10,550 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:22:10,551 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:22:11,348 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:22:13,349 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:22:15,350 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:22:17,351 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:22:18,781 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:22:19,352 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:22:21,353 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:22:23,355 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:22:25,356 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:22:25,921 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:22:25,922 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:22:27,357 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:22:41,062 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:22:41,063 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:22:48,862 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:22:56,209 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:22:56,209 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:22:57,372 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:23:11,349 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:23:11,350 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:23:18,936 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:23:26,514 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:23:26,514 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:23:41,392 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:23:42,030 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:23:42,030 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:23:49,006 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:23:50,397 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:23:57,371 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:23:57,372 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:23:58,401 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:24:07,405 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:24:12,623 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:24:12,623 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:24:15,409 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:24:19,076 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:24:23,413 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:24:25,414 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:24:27,844 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:24:27,844 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:24:31,417 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:24:33,418 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:24:35,419 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:24:42,423 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:24:43,414 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:24:43,415 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:24:49,142 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:24:58,843 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:24:58,844 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:25:13,994 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:25:13,994 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:25:19,203 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:25:23,441 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:25:29,148 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:25:29,148 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:25:31,445 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:25:33,446 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:25:44,309 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:25:44,309 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:25:49,266 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:25:59,453 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:25:59,454 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:26:14,589 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:26:14,590 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:26:19,331 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:26:29,723 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:26:29,724 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:26:44,856 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:26:44,857 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:26:49,394 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:26:59,989 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:26:59,990 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:27:15,123 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:27:15,123 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:27:19,460 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:27:30,257 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:27:30,258 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:27:45,403 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:27:45,403 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:27:49,529 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:28:00,538 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:28:00,539 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:28:15,669 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:28:15,670 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:28:19,599 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:28:30,810 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:28:30,810 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:28:45,949 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:28:45,949 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:28:49,674 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:28:56,539 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:28:57,453 DEBUG   SenderThread:1066697 [sender.py:send():234] send: telemetry
+2022-07-30 08:28:57,453 DEBUG   SenderThread:1066697 [sender.py:send():234] send: exit
+2022-07-30 08:28:57,453 INFO    SenderThread:1066697 [sender.py:send_exit():366] handling exit code: 1
+2022-07-30 08:28:57,455 INFO    SenderThread:1066697 [sender.py:send_exit():368] handling runtime: 1000
+2022-07-30 08:28:57,455 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:28:57,456 INFO    SenderThread:1066697 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 08:28:57,457 INFO    SenderThread:1066697 [sender.py:send_exit():374] send defer
+2022-07-30 08:28:57,457 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:28:57,458 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:28:57,458 INFO    HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-30 08:28:57,458 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:28:57,458 INFO    SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-30 08:28:57,458 INFO    SenderThread:1066697 [sender.py:transition_state():387] send defer: 1
+2022-07-30 08:28:57,459 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:28:57,459 INFO    HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-30 08:28:57,494 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:28:57,494 INFO    SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-30 08:28:57,494 INFO    SenderThread:1066697 [sender.py:transition_state():387] send defer: 2
+2022-07-30 08:28:57,494 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:28:57,494 INFO    HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-30 08:28:57,495 DEBUG   SenderThread:1066697 [sender.py:send():234] send: stats
+2022-07-30 08:28:57,495 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:28:57,495 INFO    SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-30 08:28:57,495 INFO    SenderThread:1066697 [sender.py:transition_state():387] send defer: 3
+2022-07-30 08:28:57,495 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:28:57,496 INFO    HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-30 08:28:57,496 DEBUG   SenderThread:1066697 [sender.py:send():234] send: summary
+2022-07-30 08:28:57,496 INFO    SenderThread:1066697 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 08:28:57,496 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:28:57,496 INFO    SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-30 08:28:57,496 INFO    SenderThread:1066697 [sender.py:transition_state():387] send defer: 4
+2022-07-30 08:28:57,497 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:28:57,497 INFO    HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-30 08:28:57,497 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:28:57,497 INFO    SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-30 08:28:57,539 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:28:57,539 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/wandb-summary.json
+2022-07-30 08:28:57,560 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:28:57,657 INFO    SenderThread:1066697 [sender.py:transition_state():387] send defer: 5
+2022-07-30 08:28:57,657 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:28:57,658 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:28:57,658 INFO    HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-30 08:28:57,658 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:28:57,658 INFO    SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-30 08:28:57,658 INFO    SenderThread:1066697 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-30 08:28:57,759 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:28:58,539 INFO    Thread-8  :1066697 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/config.yaml
+2022-07-30 08:28:58,540 INFO    SenderThread:1066697 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files
+2022-07-30 08:28:58,540 INFO    SenderThread:1066697 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/config.yaml config.yaml
+2022-07-30 08:28:58,541 INFO    SenderThread:1066697 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/diff.patch diff.patch
+2022-07-30 08:28:58,541 INFO    SenderThread:1066697 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/requirements.txt requirements.txt
+2022-07-30 08:28:58,542 INFO    SenderThread:1066697 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log output.log
+2022-07-30 08:28:58,545 INFO    SenderThread:1066697 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/wandb-summary.json wandb-summary.json
+2022-07-30 08:28:58,545 INFO    SenderThread:1066697 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/wandb-metadata.json wandb-metadata.json
+2022-07-30 08:28:58,548 INFO    SenderThread:1066697 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-30 08:28:58,548 INFO    SenderThread:1066697 [sender.py:transition_state():387] send defer: 6
+2022-07-30 08:28:58,548 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:28:58,555 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:28:58,555 INFO    HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-30 08:28:58,556 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:28:58,556 INFO    SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-30 08:28:58,556 INFO    SenderThread:1066697 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 08:28:58,656 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:28:58,656 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:28:58,758 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:28:58,758 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:28:58,859 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:28:58,859 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:28:58,961 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:28:58,961 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:28:59,048 INFO    Thread-15 :1066697 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/requirements.txt
+2022-07-30 08:28:59,063 INFO    Thread-17 :1066697 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/wandb-summary.json
+2022-07-30 08:28:59,063 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:28:59,064 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:28:59,068 INFO    Thread-14 :1066697 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/config.yaml
+2022-07-30 08:28:59,165 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:28:59,165 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:28:59,223 INFO    Thread-16 :1066697 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/files/output.log
+2022-07-30 08:28:59,267 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:28:59,267 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:28:59,368 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:28:59,369 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:28:59,424 INFO    Thread-7  :1066697 [sender.py:transition_state():387] send defer: 7
+2022-07-30 08:28:59,425 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:28:59,425 INFO    HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-30 08:28:59,425 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:28:59,425 INFO    SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-30 08:28:59,470 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:28:59,882 INFO    SenderThread:1066697 [sender.py:transition_state():387] send defer: 8
+2022-07-30 08:28:59,882 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:28:59,883 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:28:59,883 INFO    HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-30 08:28:59,883 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:28:59,883 INFO    SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-30 08:28:59,884 INFO    SenderThread:1066697 [sender.py:transition_state():387] send defer: 9
+2022-07-30 08:28:59,884 DEBUG   SenderThread:1066697 [sender.py:send():234] send: final
+2022-07-30 08:28:59,884 DEBUG   SenderThread:1066697 [sender.py:send():234] send: footer
+2022-07-30 08:28:59,885 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:28:59,885 INFO    HandlerThread:1066697 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-30 08:28:59,885 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:28:59,885 INFO    SenderThread:1066697 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-30 08:28:59,984 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:28:59,984 DEBUG   SenderThread:1066697 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:28:59,984 INFO    SenderThread:1066697 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 08:29:00,256 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-30 08:29:00,256 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-30 08:29:00,257 DEBUG   HandlerThread:1066697 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-30 08:29:00,257 INFO    HandlerThread:1066697 [handler.py:finish():731] shutting down handler
+2022-07-30 08:29:00,885 INFO    WriterThread:1066697 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/run-1j14rrmn.wandb
+2022-07-30 08:29:01,255 INFO    SenderThread:1066697 [sender.py:finish():1070] shutting down sender
+2022-07-30 08:29:01,255 INFO    SenderThread:1066697 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 08:29:01,255 INFO    SenderThread:1066697 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 08:29:01,259 INFO    MainThread:1066697 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220730_081215-1j14rrmn/logs/debug.log b/wandb/run-20220730_081215-1j14rrmn/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..eb86d60db11a05c06ce19b102739a32148297917
--- /dev/null
+++ b/wandb/run-20220730_081215-1j14rrmn/logs/debug.log
@@ -0,0 +1,148 @@
+2022-07-30 08:12:15,886 INFO    MainThread:1065426 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-30 08:12:15,886 INFO    MainThread:1065426 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-30 08:12:15,886 INFO    MainThread:1065426 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/logs/debug.log
+2022-07-30 08:12:15,886 INFO    MainThread:1065426 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_081215-1j14rrmn/logs/debug-internal.log
+2022-07-30 08:12:15,887 INFO    MainThread:1065426 [wandb_init.py:init():404] calling init triggers
+2022-07-30 08:12:15,887 INFO    MainThread:1065426 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-30 08:12:15,887 INFO    MainThread:1065426 [wandb_init.py:init():460] starting backend
+2022-07-30 08:12:15,887 INFO    MainThread:1065426 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-30 08:12:15,934 INFO    MainThread:1065426 [backend.py:ensure_launched():216] starting backend process...
+2022-07-30 08:12:15,978 INFO    MainThread:1065426 [backend.py:ensure_launched():221] started backend process with pid: 1066697
+2022-07-30 08:12:15,980 INFO    MainThread:1065426 [wandb_init.py:init():469] backend started and connected
+2022-07-30 08:12:15,994 INFO    MainThread:1065426 [wandb_init.py:init():533] updated telemetry
+2022-07-30 08:12:16,107 INFO    MainThread:1065426 [wandb_init.py:init():563] communicating current version
+2022-07-30 08:12:16,837 INFO    MainThread:1065426 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-30 08:12:16,837 INFO    MainThread:1065426 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-30 08:12:17,015 INFO    MainThread:1065426 [wandb_init.py:init():606] starting run threads in backend
+2022-07-30 08:12:19,382 INFO    MainThread:1065426 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-30 08:12:19,382 INFO    MainThread:1065426 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-30 08:12:19,383 INFO    MainThread:1065426 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-30 08:12:19,385 INFO    MainThread:1065426 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-30 08:12:19,385 INFO    MainThread:1065426 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-30 08:28:55,000 INFO    MainThread:1065426 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-30 08:28:55,006 INFO    MainThread:1065426 [wandb_run.py:_restore():1752] restore
+2022-07-30 08:28:57,458 INFO    MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 74317
+}
+
+2022-07-30 08:28:57,658 INFO    MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 74317
+}
+
+2022-07-30 08:28:58,555 INFO    MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 380625
+}
+
+2022-07-30 08:28:58,657 INFO    MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 380625
+}
+
+2022-07-30 08:28:58,758 INFO    MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 380625
+  total_bytes: 380625
+}
+
+2022-07-30 08:28:58,860 INFO    MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 380625
+  total_bytes: 380625
+}
+
+2022-07-30 08:28:58,962 INFO    MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 380625
+  total_bytes: 380625
+}
+
+2022-07-30 08:28:59,064 INFO    MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 380625
+  total_bytes: 380625
+}
+
+2022-07-30 08:28:59,166 INFO    MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 380625
+  total_bytes: 380625
+}
+
+2022-07-30 08:28:59,268 INFO    MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 380625
+  total_bytes: 380625
+}
+
+2022-07-30 08:28:59,369 INFO    MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 380625
+  total_bytes: 380625
+}
+
+2022-07-30 08:28:59,883 INFO    MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 380625
+  total_bytes: 380625
+}
+
+2022-07-30 08:29:00,255 INFO    MainThread:1065426 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 380625
+  total_bytes: 380625
+}
+local_info {
+}
+
+2022-07-30 08:29:01,839 INFO    MainThread:1065426 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220730_081215-1j14rrmn/run-1j14rrmn.wandb b/wandb/run-20220730_081215-1j14rrmn/run-1j14rrmn.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..6d438db798a75fadee9d1caa3331331d701b9f43
--- /dev/null
+++ b/wandb/run-20220730_081215-1j14rrmn/run-1j14rrmn.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2d31fe4374129f405b586f7d4ac768ff254bb9407ef86c2ca61191d001f87c7
+size 420905
diff --git a/wandb/run-20220730_083825-1jwtqtqg/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_083825-1jwtqtqg/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac
--- /dev/null
+++ b/wandb/run-20220730_083825-1jwtqtqg/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1605 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220730_083825-1jwtqtqg/files/config.yaml b/wandb/run-20220730_083825-1jwtqtqg/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..303548b1b8391310517c1c46afcfd54ce1d94227
--- /dev/null
+++ b/wandb/run-20220730_083825-1jwtqtqg/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659170305
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220730_083825-1jwtqtqg/files/diff.patch b/wandb/run-20220730_083825-1jwtqtqg/files/diff.patch
new file mode 100644
index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642
--- /dev/null
+++ b/wandb/run-20220730_083825-1jwtqtqg/files/diff.patch
@@ -0,0 +1,10 @@
+diff --git a/.gitattributes b/.gitattributes
+index 755356a..f0eef4b 100644
+--- a/.gitattributes
++++ b/.gitattributes
+@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+ *.zip filter=lfs diff=lfs merge=lfs -text
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
++*.wandb filter=lfs diff=lfs merge=lfs -text
+\ No newline at end of file
diff --git a/wandb/run-20220730_083825-1jwtqtqg/files/output.log b/wandb/run-20220730_083825-1jwtqtqg/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..f5393e7b38aee757804c8230b435f52beae8e578
--- /dev/null
+++ b/wandb/run-20220730_083825-1jwtqtqg/files/output.log
@@ -0,0 +1,1596 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul30_08-38-21_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=16,
+per_device_train_batch_size=16,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 78.80it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 442.23it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_q', 'bias'), ('project_hid', 'bias'), ('project_hid', 'kernel'), ('quantizer', 'codevectors'), ('quantizer', 'weight_proj', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('project_q', 'kernel')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'bias'), ('lm_head', 'kernel')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #0:   6%|███████████                                                                                                                                                                                           | 534/9523 [00:00<00:01, 5336.47ex/s]
+removing punctuation from train split #1:   9%|█████████████████▌                                                                                                                                                                                    | 842/9523 [00:00<00:01, 8413.07ex/s]
+removing punctuation from train split #2:   9%|█████████████████▍                                                                                                                                                                                    | 838/9523 [00:00<00:01, 8373.56ex/s]
+removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8069.44ex/s]
+removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8314.43ex/s]
+removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8520.45ex/s]
+removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8426.38ex/s]
+removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8000.68ex/s]
+removing punctuation from train split #9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8342.10ex/s]
+removing punctuation from train split #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8513.85ex/s]
+removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8257.75ex/s]
+removing punctuation from train split #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8343.57ex/s]
+removing punctuation from train split #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8202.27ex/s]
+removing punctuation from train split #14: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8288.07ex/s]
+removing punctuation from train split #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8171.57ex/s]
+removing punctuation from train split #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8342.00ex/s]
+removing punctuation from train split #17: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8211.27ex/s]
+removing punctuation from train split #9:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9447/9523 [00:01<00:00, 8221.07ex/s]
+removing punctuation from train split #12:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                     | 7696/9522 [00:00<00:00, 7880.94ex/s]
+removing punctuation from train split #10:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊   | 9369/9523 [00:01<00:00, 8204.75ex/s]
+removing punctuation from train split #11:  90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                  | 8610/9523 [00:01<00:00, 8530.61ex/s]
+removing punctuation from train split #12:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                   | 8570/9522 [00:01<00:00, 8127.58ex/s]
+removing punctuation from train split #13:  88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                       | 8390/9522 [00:01<00:00, 8024.73ex/s]
+removing punctuation from train split #12:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9438/9522 [00:01<00:00, 8288.37ex/s]
+removing punctuation from train split #15:  79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                         | 7524/9522 [00:00<00:00, 7888.33ex/s]
+removing punctuation from train split #13:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎     | 9247/9522 [00:01<00:00, 8184.43ex/s]
+removing punctuation from train split #17:  70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                           | 6654/9522 [00:00<00:00, 8547.53ex/s]
+removing punctuation from train split #14:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 9339/9522 [00:01<00:00, 8295.07ex/s]
+removing punctuation from train split #19:  52%|█████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                              | 4949/9522 [00:00<00:00, 8423.73ex/s]
+removing punctuation from train split #15:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍      | 9202/9522 [00:01<00:00, 8102.80ex/s]
+removing punctuation from train split #18:  55%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                         | 5198/9522 [00:00<00:00, 7093.89ex/s]
+removing punctuation from train split #16:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 9406/9522 [00:01<00:00, 8346.54ex/s]
+removing punctuation from train split #18:  64%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                       | 6053/9522 [00:00<00:00, 6973.73ex/s]
+removing punctuation from train split #17:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████      | 9235/9522 [00:01<00:00, 8258.18ex/s]
+removing punctuation from train split #18:  73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                     | 6918/9522 [00:00<00:00, 7444.23ex/s]
+removing punctuation from train split #18:  82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                   | 7795/9522 [00:01<00:00, 7823.84ex/s]
+removing punctuation from train split #20:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                       | 7592/9522 [00:00<00:00, 7943.57ex/s]
+removing punctuation from train split #21:  70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                          | 6688/9522 [00:00<00:00, 8583.36ex/s]
+removing punctuation from train split #29:   0%|                                                                                                                                                                                                                 | 0/9522 [00:00<?, ?ex/s]
+removing punctuation from train split #22:  61%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                           | 5845/9522 [00:00<00:00, 7851.70ex/s]
+removing punctuation from train split #23:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                         | 6721/9522 [00:00<00:00, 8520.78ex/s]
+removing punctuation from train split #24:  61%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                            | 5827/9522 [00:00<00:00, 8499.34ex/s]
+removing punctuation from train split #25:  62%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                          | 5908/9522 [00:00<00:00, 8414.18ex/s]
+removing punctuation from train split #27:  44%|█████████████████████████████████████████████████████████████████████████████████████▉                                                                                                              | 4174/9522 [00:00<00:00, 8467.47ex/s]
+removing punctuation from train split #27:  53%|███████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                            | 5033/9522 [00:00<00:00, 8506.53ex/s]
+removing punctuation from train split #29:  26%|██████████████████████████████████████████████████▌                                                                                                                                                 | 2456/9522 [00:00<00:00, 8301.96ex/s]
+removing punctuation from train split #30:  25%|█████████████████████████████████████████████████▌                                                                                                                                                  | 2410/9522 [00:00<00:00, 8125.85ex/s]
+removing punctuation from train split #30:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                  | 8622/9522 [00:01<00:00, 8795.05ex/s]
+removing punctuation from train split #31:  83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                  | 7865/9522 [00:00<00:00, 9004.72ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow8766/9522 [00:01<00:00, 8279.27ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow
+preprocess dataset #0:   1%|█▋                                                                                                                                                                                                                          | 71/9497 [00:02<04:41, 33.51ex/s]
+preprocess dataset #1:   0%|▉                                                                                                                                                                                                                           | 43/9497 [00:01<04:52, 32.27ex/s]
+preprocess dataset #2:   0%|▎                                                                                                                                                                                                                           | 12/9497 [00:01<09:04, 17.42ex/s]
+preprocess dataset #3:   1%|█▋                                                                                                                                                                                                                          | 73/9497 [00:02<03:43, 42.17ex/s]
+preprocess dataset #4:   0%|▊                                                                                                                                                                                                                           | 36/9497 [00:01<05:03, 31.21ex/s]
+preprocess dataset #5:   0%|▏                                                                                                                                                                                                                            | 9/9497 [00:00<12:19, 12.83ex/s]
+preprocess dataset #6:   1%|█▏                                                                                                                                                                                                                          | 51/9497 [00:02<04:51, 32.38ex/s]
+preprocess dataset #7:   0%|▉                                                                                                                                                                                                                           | 43/9497 [00:01<03:49, 41.12ex/s]
+preprocess dataset #8:   1%|█▊                                                                                                                                                                                                                          | 79/9497 [00:02<03:16, 47.83ex/s]
+preprocess dataset #9:   0%|█                                                                                                                                                                                                                           | 46/9497 [00:01<03:35, 43.93ex/s]
+preprocess dataset #10:   1%|█▊                                                                                                                                                                                                                         | 79/9497 [00:02<04:14, 37.02ex/s]
+preprocess dataset #11:   0%|▋                                                                                                                                                                                                                          | 30/9496 [00:01<05:09, 30.59ex/s]
+preprocess dataset #12:   0%|▏                                                                                                                                                                                                                           | 7/9496 [00:00<16:06,  9.82ex/s]
+preprocess dataset #13:   1%|█▏                                                                                                                                                                                                                         | 49/9496 [00:02<04:38, 33.94ex/s]
+preprocess dataset #14:   0%|                                                                                                                                                                                                                            | 5/9496 [00:00<24:26,  6.47ex/s]
+preprocess dataset #15:   1%|█▍                                                                                                                                                                                                                         | 65/9496 [00:02<04:06, 38.30ex/s]
+preprocess dataset #16:   0%|▌                                                                                                                                                                                                                          | 27/9496 [00:01<05:06, 30.91ex/s]
+preprocess dataset #17:   1%|█▎                                                                                                                                                                                                                         | 56/9496 [00:02<04:45, 33.10ex/s]
+preprocess dataset #18:   0%|▌                                                                                                                                                                                                                          | 26/9496 [00:01<05:49, 27.07ex/s]
+preprocess dataset #19:   1%|█▎                                                                                                                                                                                                                         | 56/9496 [00:02<04:31, 34.74ex/s]
+preprocess dataset #20:   0%|▌                                                                                                                                                                                                                          | 24/9496 [00:01<06:09, 25.62ex/s]
+preprocess dataset #21:   1%|█▏                                                                                                                                                                                                                         | 52/9496 [00:02<04:57, 31.79ex/s]
+preprocess dataset #22:   0%|▋                                                                                                                                                                                                                          | 30/9496 [00:01<05:19, 29.63ex/s]
+preprocess dataset #23:   1%|█                                                                                                                                                                                                                          | 48/9496 [00:02<05:13, 30.14ex/s]
+preprocess dataset #24:   0%|▎                                                                                                                                                                                                                          | 14/9496 [00:01<09:36, 16.45ex/s]
+preprocess dataset #25:   0%|▉                                                                                                                                                                                                                          | 41/9496 [00:02<06:10, 25.54ex/s]
+preprocess dataset #26:   0%|▎                                                                                                                                                                                                                          | 16/9496 [00:01<09:26, 16.75ex/s]
+preprocess dataset #27:   0%|▉                                                                                                                                                                                                                          | 38/9496 [00:02<06:49, 23.11ex/s]
+preprocess dataset #28:   0%|▎                                                                                                                                                                                                                          | 15/9496 [00:01<11:03, 14.29ex/s]
+preprocess dataset #29:   0%|▉                                                                                                                                                                                                                          | 41/9496 [00:02<06:05, 25.87ex/s]
+preprocess dataset #30:   0%|                                                                                                                                                                                                                          | 1/9496 [00:00<2:23:11,  1.11ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #22:  48%|███████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                                 | 4517/9496 [02:28<02:29, 33.35ex/s]
+preprocess dataset #23:  47%|██████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                                  | 4489/9496 [02:27<02:33, 32.56ex/s]
+preprocess dataset #24:  45%|█████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                                       | 4267/9496 [02:26<02:43, 31.98ex/s]
+preprocess dataset #25:  45%|████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                        | 4241/9496 [02:24<02:36, 33.48ex/s]
+preprocess dataset #26:  45%|█████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                                       | 4256/9496 [02:23<02:38, 33.02ex/s]
+preprocess dataset #27:  47%|█████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                                   | 4428/9496 [02:22<02:26, 34.62ex/s]
+preprocess dataset #28:  44%|██████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                                          | 4142/9496 [02:21<02:45, 32.34ex/s]
+preprocess dataset #29:  43%|█████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                           | 4111/9496 [02:20<02:57, 30.40ex/s]
+preprocess dataset #30:  45%|██████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                                      | 4308/9496 [02:19<02:34, 33.57ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  56%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                | 5280/9496 [02:51<02:00, 34.98ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #2:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌              | 8870/9497 [04:46<00:18, 33.80ex/s]
+preprocess dataset #3:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                 | 8745/9497 [04:46<00:24, 31.32ex/s]
+preprocess dataset #4:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                     | 8546/9497 [04:45<00:29, 32.79ex/s]
+preprocess dataset #5:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                    | 8620/9497 [04:44<00:28, 30.53ex/s]
+preprocess dataset #6:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                      | 8508/9497 [04:43<00:30, 32.47ex/s]
+preprocess dataset #7:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                   | 8632/9497 [04:43<00:26, 32.57ex/s]
+preprocess dataset #8:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                     | 8561/9497 [04:42<00:29, 32.13ex/s]
+preprocess dataset #9:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                     | 8574/9497 [04:41<00:27, 33.10ex/s]
+preprocess dataset #10:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                         | 8369/9497 [04:40<00:33, 33.55ex/s]
+preprocess dataset #11:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                         | 8371/9496 [04:39<00:35, 31.77ex/s]
+preprocess dataset #12:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                          | 8360/9496 [04:38<00:38, 29.82ex/s]
+preprocess dataset #13:  88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                         | 8403/9496 [04:38<00:31, 34.67ex/s]
+preprocess dataset #14:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                        | 8445/9496 [04:36<00:31, 33.84ex/s]
+preprocess dataset #15:  87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                           | 8305/9496 [04:36<00:35, 33.79ex/s]
+preprocess dataset #16:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                         | 8386/9496 [04:35<00:31, 34.75ex/s]
+preprocess dataset #17:  87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                            | 8276/9496 [04:33<00:35, 33.94ex/s]
+preprocess dataset #18:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                            | 8249/9496 [04:33<00:36, 34.28ex/s]
+preprocess dataset #19:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                              | 8166/9496 [04:32<00:41, 32.44ex/s]
+preprocess dataset #20:  86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                               | 8132/9496 [04:31<00:41, 32.56ex/s]
+preprocess dataset #21:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                              | 8178/9496 [04:29<00:37, 34.81ex/s]
+preprocess dataset #22:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                | 8078/9496 [04:29<00:43, 32.86ex/s]
+preprocess dataset #23:  85%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                               | 8104/9496 [04:27<00:41, 33.80ex/s]
+preprocess dataset #24:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                   | 7933/9496 [04:26<00:46, 33.92ex/s]
+preprocess dataset #25:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                   | 7957/9496 [04:25<00:43, 35.14ex/s]
+preprocess dataset #26:  83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                    | 7925/9496 [04:24<00:48, 32.15ex/s]
+preprocess dataset #27:  85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                 | 8043/9496 [04:23<00:44, 32.33ex/s]
+preprocess dataset #28:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                     | 7870/9496 [04:22<00:49, 32.91ex/s]
+preprocess dataset #29:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                     | 7876/9496 [04:21<00:48, 33.50ex/s]
+preprocess dataset #30:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                   | 7965/9496 [04:20<00:44, 34.18ex/s]
+
+preprocess dataset #14:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                     | 8565/9496 [04:40<00:27, 33.79ex/s]
+preprocess dataset #15:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                        | 8413/9496 [04:39<00:33, 32.62ex/s]
+preprocess dataset #16:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                      | 8501/9496 [04:38<00:31, 32.08ex/s]
+preprocess dataset #18:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                         | 8366/9496 [04:36<00:35, 31.46ex/s]
+preprocess dataset #19:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                            | 8269/9496 [04:35<00:39, 31.37ex/s]
+preprocess dataset #27:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                              | 8156/9496 [04:27<00:40, 33.08ex/s]
+preprocess dataset #29:  85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                 | 8031/9496 [04:27<00:54, 26.76ex/s]
+preprocess dataset #30:  86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                               | 8121/9496 [04:26<00:47, 29.11ex/s]
+
+
+
+
+
+
+preprocess dataset #30:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                     | 8577/9496 [04:40<00:30, 30.56ex/s]
+preprocess dataset #6:  96%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏       | 9154/9497 [05:06<00:12, 27.22ex/s]
+preprocess dataset #7:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎    | 9295/9497 [05:05<00:06, 29.53ex/s]
+preprocess dataset #8:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎      | 9208/9497 [05:04<00:10, 26.89ex/s]
+preprocess dataset #9:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊      | 9230/9497 [05:03<00:09, 29.26ex/s]
+preprocess dataset #10:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉           | 9015/9497 [05:03<00:26, 18.33ex/s]
+preprocess dataset #11:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉           | 9013/9496 [05:02<00:25, 18.86ex/s]
+preprocess dataset #12:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉           | 9013/9496 [05:01<00:26, 18.06ex/s]
+preprocess dataset #13:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋         | 9086/9496 [05:00<00:11, 34.74ex/s]
+preprocess dataset #14:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌        | 9127/9496 [04:59<00:10, 35.09ex/s]
+preprocess dataset #15:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏           | 8978/9496 [04:58<00:19, 27.11ex/s]
+preprocess dataset #16:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████          | 9062/9496 [04:57<00:12, 33.86ex/s]
+preprocess dataset #17:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋            | 8957/9496 [04:56<00:17, 30.68ex/s]
+preprocess dataset #18:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌            | 8954/9496 [04:55<00:14, 37.11ex/s]
+preprocess dataset #19:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏              | 8850/9496 [04:54<00:21, 30.24ex/s]
+preprocess dataset #20:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎               | 8811/9496 [04:53<00:20, 33.84ex/s]
+preprocess dataset #21:  94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉              | 8881/9496 [04:52<00:16, 36.97ex/s]
+preprocess dataset #22:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                | 8765/9496 [04:51<00:21, 34.72ex/s]
+preprocess dataset #23:  93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                | 8785/9496 [04:50<00:20, 35.11ex/s]
+preprocess dataset #24:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                    | 8590/9496 [04:49<00:28, 32.12ex/s]
+preprocess dataset #25:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                    | 8607/9496 [04:48<00:32, 27.53ex/s]
+preprocess dataset #26:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                    | 8602/9496 [04:47<00:28, 31.76ex/s]
+preprocess dataset #27:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                 | 8736/9496 [04:46<00:21, 35.88ex/s]
+preprocess dataset #28:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                      | 8528/9496 [04:45<00:35, 26.98ex/s]
+preprocess dataset #29:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                     | 8541/9496 [04:43<00:37, 25.21ex/s]
+preprocess dataset #30:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                   | 8637/9496 [04:42<00:26, 31.89ex/s]
+
+preprocess dataset #30:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                | 8770/9496 [04:46<00:24, 29.61ex/s]
+
+preprocess dataset #29:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎               | 8809/9496 [04:51<00:17, 39.10ex/s]
+preprocess dataset #30:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍             | 8904/9496 [04:50<00:19, 30.02ex/s]
+preprocess dataset #28:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████               | 8843/9496 [04:54<00:17, 36.58ex/s]
+preprocess dataset #29:  94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉              | 8879/9496 [04:53<00:15, 40.73ex/s]
+preprocess dataset #30:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████            | 8973/9496 [04:52<00:12, 42.37ex/s]
+preprocess dataset #29:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋            | 8955/9496 [04:55<00:14, 37.48ex/s]
+preprocess dataset #30:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏          | 9023/9496 [04:54<00:17, 26.41ex/s]
+preprocess dataset #30:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉          | 9056/9496 [04:55<00:12, 34.88ex/s]
+preprocess dataset #31:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊               | 8832/9496 [04:54<00:22, 30.01ex/s]
+preprocess dataset #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 9485/9497 [05:17<00:00, 47.56ex/s]
+preprocess dataset #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 9478/9496 [05:16<00:00, 41.63ex/s]
+preprocess dataset #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 9491/9496 [05:15<00:00, 39.16ex/s]
+preprocess dataset #15:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 9433/9496 [05:12<00:01, 38.48ex/s]
+preprocess dataset #17:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 9407/9496 [05:10<00:01, 51.86ex/s]
+preprocess dataset #18:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9421/9496 [05:09<00:02, 36.07ex/s]
+preprocess dataset #19:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍    | 9294/9496 [05:08<00:05, 37.35ex/s]
+preprocess dataset #20:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎     | 9246/9496 [05:07<00:06, 35.74ex/s]
+preprocess dataset #21:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍   | 9341/9496 [05:06<00:04, 36.68ex/s]
+preprocess dataset #22:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋      | 9221/9496 [05:05<00:06, 39.88ex/s]
+preprocess dataset #23:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌     | 9257/9496 [05:04<00:05, 40.27ex/s]
+preprocess dataset #24:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋          | 9045/9496 [05:03<00:10, 41.39ex/s]
+preprocess dataset #25:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉          | 9053/9496 [05:02<00:10, 41.27ex/s]
+preprocess dataset #26:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌          | 9038/9496 [05:01<00:13, 33.94ex/s]
+preprocess dataset #27:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████       | 9192/9496 [05:00<00:08, 35.46ex/s]
+preprocess dataset #28:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌           | 8997/9496 [04:58<00:09, 50.25ex/s]
+preprocess dataset #30:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉         | 9102/9496 [04:56<00:09, 41.00ex/s]
+preprocess dataset #26:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏        | 9113/9496 [05:03<00:08, 44.30ex/s]
+preprocess dataset #27:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████     | 9281/9496 [05:01<00:04, 44.18ex/s]
+preprocess dataset #28:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊          | 9050/9496 [05:00<00:10, 42.71ex/s]
+preprocess dataset #29:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎         | 9070/9496 [04:59<00:10, 40.37ex/s]
+preprocess dataset #30:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊       | 9183/9496 [04:58<00:07, 42.60ex/s]
+preprocess dataset #29:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████       | 9190/9496 [05:01<00:05, 58.71ex/s]
+preprocess dataset #30:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████     | 9279/9496 [05:00<00:04, 50.43ex/s]
+preprocess dataset #26:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋   | 9351/9496 [05:07<00:02, 52.59ex/s]
+preprocess dataset #28:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎    | 9289/9496 [05:05<00:03, 54.48ex/s]
+preprocess dataset #28:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍    | 9296/9496 [05:05<00:03, 56.63ex/s]
+preprocess dataset #30:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 9397/9496 [05:02<00:01, 52.99ex/s]
+preprocess dataset #30:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 9404/9496 [05:02<00:01, 54.20ex/s]
+preprocess dataset #31:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋     | 9262/9496 [05:03<00:03, 70.81ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 9480/9496 [05:08<00:00, 68.81ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 9495/9496 [05:08<00:00, 45.30ex/s]
+preprocess dataset #31:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎   | 9335/9496 [05:04<00:02, 73.90ex/s]
+preprocess dataset #31:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊   | 9359/9496 [05:04<00:01, 70.33ex/s]
+preprocess dataset #31:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 9367/9496 [05:05<00:02, 53.37ex/s]
+
+preprocess dataset #18:   1%|█▋                                                                                                                                                                                                                         | 10/1267 [00:01<01:39, 12.67ex/s]
+preprocess dataset #19:   1%|█▋                                                                                                                                                                                                                         | 10/1267 [00:01<01:38, 12.74ex/s]
+preprocess dataset #20:   1%|█▉                                                                                                                                                                                                                         | 11/1266 [00:01<01:25, 14.67ex/s]
+preprocess dataset #21:   0%|▊                                                                                                                                                                                                                           | 5/1266 [00:01<03:36,  5.83ex/s]
+preprocess dataset #22:   1%|█▏                                                                                                                                                                                                                          | 7/1266 [00:01<02:16,  9.21ex/s]
+preprocess dataset #23:   0%|▏                                                                                                                                                                                                                           | 1/1266 [00:00<20:11,  1.04ex/s]
+preprocess dataset #24:   0%|▋                                                                                                                                                                                                                           | 4/1266 [00:01<04:16,  4.93ex/s]
+preprocess dataset #25:   0%|▊                                                                                                                                                                                                                           | 5/1266 [00:01<03:19,  6.33ex/s]
+preprocess dataset #26:   0%|▏                                                                                                                                                                                                                           | 1/1266 [00:00<19:23,  1.09ex/s]
+preprocess dataset #27:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #28:   0%|▏                                                                                                                                                                                                                           | 1/1266 [00:00<18:51,  1.12ex/s]
+preprocess dataset #29:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #30:   0%|▏                                                                                                                                                                                                                           | 1/1266 [00:00<17:59,  1.17ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1265/1266 [00:36<00:00, 82.26ex/s]
+preprocess dataset #29:  81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                         | 1024/1266 [00:41<00:35,  6.79ex/s]
+
+
+preprocess dataset #23:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 1258/1266 [00:47<00:00, 32.93ex/s]
+preprocess dataset #20:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 1252/1266 [00:47<00:00, 36.30ex/s]
+preprocess dataset #29:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 1246/1266 [00:46<00:00, 37.29ex/s]
+preprocess dataset #23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1266/1266 [00:47<00:00, 31.63ex/s]
+preprocess dataset #25:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 1252/1266 [00:47<00:00, 32.40ex/s]
+preprocess dataset #25:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 1256/1266 [00:47<00:00, 31.94ex/s]
+preprocess dataset #29:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 1254/1266 [00:46<00:00, 34.08ex/s]
+preprocess dataset #29:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 1258/1266 [00:47<00:00, 33.30ex/s]
+preprocess dataset #31:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 1249/1266 [00:46<00:00, 32.78ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 1262/1266 [00:47<00:00, 32.73ex/s]
+preprocess dataset #25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 1260/1266 [00:47<00:00, 33.36ex/s]
+preprocess dataset #25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1266/1266 [00:47<00:00, 38.06ex/s]
+E0730 08:47:40.617973 4118873 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1265/1266 [00:47<00:00, 35.59ex/s]
+https://symbolize.stripped_domain/r/?trace=7fbcc721bedb,7fbcc72710bf,7fba77961bf7,7fba779627ef,7fba779637ea,7fba779629dc,7fba779633de,7fba77a49a43,7fbcc72710bf,7f,bc3ca817bc9ab942&map=fbcd4e3f2be272741f2aecd9d840a066:7fba624ac000-7fba77ddbc60
+E0730 08:47:40.671780 4118894 process_state.cc:1062] RAW: Signal 11 raised at PC: 0x7fbcc721bedb while already in FailureSignalHandler!
+E0730 08:47:40.671798 4118894 process_state.cc:1097] RAW: Raising 11 signal with default behavior
+https://symbolize.stripped_domain/r/?trace=7fbcc721bedb,7fbcc72710bf,7fba77961bf7,7fba779627ef,7fba779637ea,7fba779629dc,7fba779633de,7fba77a49a43,7fbcc72710bf,7f,bc3ca817bc9ab942&map=fbcd4e3f2be272741f2aecd9d840a066:7fba624ac000-7fba77ddbc60
+E0730 08:47:40.677607 4118873 process_state.cc:1062] RAW: Signal 11 raised at PC: 0x7fbcc721bedb while already in FailureSignalHandler!
+E0730 08:47:40.677637 4118873 process_state.cc:1097] RAW: Raising 11 signal with default behavior
+preprocess dataset #0:   1%|█▊                                                                                                                                                                                                                          | 21/2555 [00:01<01:53, 22.31ex/s]
+preprocess dataset #1:   1%|█▋                                                                                                                                                                                                                          | 20/2555 [00:01<01:56, 21.67ex/s]
+preprocess dataset #2:   1%|█▍                                                                                                                                                                                                                          | 16/2555 [00:01<02:21, 17.95ex/s]
+preprocess dataset #3:   1%|█▍                                                                                                                                                                                                                          | 16/2555 [00:01<02:25, 17.46ex/s]
+preprocess dataset #4:   1%|█▍                                                                                                                                                                                                                          | 16/2555 [00:01<02:12, 19.18ex/s]
+preprocess dataset #5:   1%|█▉                                                                                                                                                                                                                          | 22/2555 [00:01<01:41, 25.02ex/s]
+preprocess dataset #6:   1%|█▍                                                                                                                                                                                                                          | 16/2554 [00:01<02:13, 18.98ex/s]
+preprocess dataset #7:   1%|█▍                                                                                                                                                                                                                          | 16/2554 [00:01<02:15, 18.75ex/s]
+preprocess dataset #8:   1%|█▎                                                                                                                                                                                                                          | 15/2554 [00:01<02:21, 18.01ex/s]
+preprocess dataset #9:   1%|█▍                                                                                                                                                                                                                          | 16/2554 [00:01<02:09, 19.53ex/s]
+preprocess dataset #10:   1%|█▎                                                                                                                                                                                                                         | 15/2554 [00:01<02:15, 18.78ex/s]
+preprocess dataset #11:   1%|█▎                                                                                                                                                                                                                         | 16/2554 [00:01<02:09, 19.58ex/s]
+preprocess dataset #12:   0%|▉                                                                                                                                                                                                                          | 11/2554 [00:01<02:56, 14.40ex/s]
+preprocess dataset #13:   0%|▌                                                                                                                                                                                                                           | 7/2554 [00:01<04:34,  9.27ex/s]
+preprocess dataset #14:   0%|▋                                                                                                                                                                                                                           | 8/2554 [00:01<04:03, 10.46ex/s]
+preprocess dataset #15:   0%|▌                                                                                                                                                                                                                           | 7/2554 [00:01<04:33,  9.33ex/s]
+preprocess dataset #16:   0%|▋                                                                                                                                                                                                                           | 8/2554 [00:01<03:46, 11.22ex/s]
+preprocess dataset #17:   0%|▋                                                                                                                                                                                                                           | 8/2554 [00:01<03:45, 11.28ex/s]
+preprocess dataset #18:   0%|▎                                                                                                                                                                                                                           | 4/2554 [00:00<08:24,  5.05ex/s]
+preprocess dataset #19:   0%|▍                                                                                                                                                                                                                           | 5/2554 [00:00<06:21,  6.69ex/s]
+preprocess dataset #20:   0%|▎                                                                                                                                                                                                                           | 4/2554 [00:00<07:40,  5.54ex/s]
+preprocess dataset #21:   0%|▎                                                                                                                                                                                                                           | 4/2554 [00:00<07:46,  5.46ex/s]
+preprocess dataset #22:   0%|                                                                                                                                                                                                                            | 1/2554 [00:00<37:19,  1.14ex/s]
+preprocess dataset #23:   0%|                                                                                                                                                                                                                            | 1/2554 [00:00<36:14,  1.17ex/s]
+preprocess dataset #24:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #25:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #26:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #27:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #28:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #29:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #30:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #18: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 2553/2554 [01:20<00:00, 27.56ex/s]
+preprocess dataset #27: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 2552/2554 [01:19<00:00, 36.04ex/s]
+preprocess dataset #25: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 2546/2554 [01:19<00:00, 31.24ex/s]
+preprocess dataset #30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2554/2554 [01:19<00:00, 39.33ex/s]
+#13: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 486.74ba/s]
+#14: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 489.83ba/s]
+#15: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 486.32ba/s]
+#17: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 502.56ba/s]
+#18: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 517.89ba/s]
+#19: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 501.85ba/s]
+#16: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 325.42ba/s]
+#21: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 508.14ba/s]
+#23: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 512.41ba/s]
+#20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 380.57ba/s]
+#24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 499.61ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 528.95ba/s]
+#22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 344.97ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 533.40ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 511.35ba/s]
+#25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 351.43ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 498.30ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 407.33ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 394.91ba/s]
+#20:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 259.38ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 251.45ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 267.86ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 262.30ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 266.88ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 255.79ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 265.27ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 265.69ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 254.10ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 259.89ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 261.91ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 273.69ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 338.32ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 345.39ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 333.89ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 324.69ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 355.76ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 133.66ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 136.93ba/s]
+#25: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 91.55ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 126.82ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 113.87ba/s]
+https://symbolize.stripped_domain/r/?trace=7fbcc721d3f4,7fbcc72710bf,7f,bc570641bc1f8bce&map=                                                                                                                                                                      | 0/10 [00:00<?, ?ba/s]
+*** SIGTERM received by PID 3285804 (TID 3285804) on cpu 47 from PID 86199; stack trace: ***                                                                                                                                                                       | 0/10 [00:00<?, ?ba/s]
+PC: @     0x7fbcc721d3f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7fba77a49294        976  (unknown)
+    @     0x7fbcc72710c0  1015053456  (unknown)
+    @               0x80  (unknown)  (unknown)
+    @ 0xbc570641bc1f8bcf  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7fbcc721d3f4,7fba77a49293,7fbcc72710bf,7f,bc570641bc1f8bce&map=fbcd4e3f2be272741f2aecd9d840a066:7fba624ac000-7fba77ddbc60                                                                                               | 0/10 [00:00<?, ?ba/s]
+E0730 08:50:27.785716 3285804 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0730 08:50:27.856936 3285804 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#21:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                | 8/10 [00:00<00:00, 77.81ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 96.75ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#31:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#12: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 114.41ba/s]
+#14: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.38ba/s]
+#13: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 75.31ba/s]
+#16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 113.37ba/s]
+#15: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 91.24ba/s]
+#17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 112.62ba/s]
+#18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 110.90ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 113.15ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 114.34ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 115.32ba/s]
+#22: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 74.25ba/s]
+#23: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 85.28ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.92ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 113.07ba/s]
+#24: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 88.37ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 108.30ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 113.72ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 114.98ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 117.92ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 120.71ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 119.43ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 122.67ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 122.59ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 126.94ba/s]
+https://symbolize.stripped_domain/r/?trace=7fbcc721d3f4,7fbcc72710bf,7f,bc570641bc1f8bce&map=
+*** SIGTERM received by PID 3285998 (TID 3285998) on cpu 42 from PID 86199; stack trace: ***                                                                                                                                                                        | 0/3 [00:00<?, ?ba/s]
+PC: @     0x7fbcc721d3f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7fba77a49294        976  (unknown)                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+    @     0x7fbcc72710c0  1015053456  (unknown)
+    @               0x80  (unknown)  (unknown)                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+    @ 0xbc570641bc1f8bcf  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7fbcc721d3f4,7fba77a49293,7fbcc72710bf,7f,bc570641bc1f8bce&map=fbcd4e3f2be272741f2aecd9d840a066:7fba624ac000-7fba77ddbc60                                                                                                | 0/3 [00:00<?, ?ba/s]
+E0730 08:50:43.372675 3285998 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0730 08:50:43.444273 3285998 process_state.cc:774] RAW: Raising signal 15 with default behavior                                                                                                                                                                    | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+  "architectures": [                                                                                                                                                                                                                                                | 0/3 [00:00<?, ?ba/s]
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.094,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.04,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": true,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.047,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.041,
+  "mask_feature_length": 64,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.25,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.082,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 38,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 39,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file ./preprocessor_config.json
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+2022-07-30 08:50:45.177810: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
+2022-07-30 08:50:45.177866: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
+/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+INFO:__main__:***** Running training *****
+INFO:__main__:  Num examples = 302693
+INFO:__main__:  Num Epochs = 40
+INFO:__main__:  Instantaneous batch size per device = 16
+INFO:__main__:  Num gradient accumulation steps = 1
+INFO:__main__:  Total train batch size (w. parallel & distributed) = 128
+INFO:__main__:  Total optimization steps = 94560
+INFO:__main__:  Gradient checkpointing: True
+INFO:__main__:  Use scan: False
+INFO:__main__:  Fuse matmuls: False
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [00:00<?, ?it/s]
+
+  grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)                                                                                                                                                                                     | 0/2364 [00:00<?, ?it/s]
+run_flax_speech_recognition_ctc.py:1392: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+run_flax_speech_recognition_ctc.py:1399: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+Training...:   0%|                                                                                                                                                                                                                                               | 0/2364 [04:16<?, ?it/s]
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [04:23<?, ?it/s]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback
+    return fun(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss
+    out_tree, out_flat = f_pmapped_(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f
+    out = pxla.xla_pmap(
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind
+    return map_bind(self, fun, *args, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind
+    outs = primitive.process(top_trace, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process
+    return trace.process_map(self, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call
+    return primitive.impl(f, *tracers, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 829, in xla_pmap_impl
+    compiled_fun, fingerprint = parallel_callable(
+  File "/data/flax/lib/python3.8/site-packages/jax/linear_util.py", line 295, in memoized_fun
+    ans = call(fun, *args)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 860, in parallel_callable
+    pmap_executable = pmap_computation.compile()
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper
+    return func(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1137, in compile
+    self._executable = PmapExecutable.from_hlo(self._hlo, **self.compile_args)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1285, in from_hlo
+    compiled = dispatch.compile_or_get_cached(
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 899, in compile_or_get_cached
+    return backend_compile(backend, computation, compile_options, host_callbacks)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper
+    return func(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 843, in backend_compile
+    return backend.compile(built_c, compile_options=options)
+jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 17.42G of 15.48G hbm. Exceeded hbm capacity by 1.94G.
+Total hbm usage >= 17.94G:
+    reserved        530.00M
+    program           6.62G
+    arguments        10.80G
+Output size 10.76G; shares 10.76G with arguments.
+Program hbm requirement 6.62G:
+    global           180.0K
+    scoped           72.08M
+    HLO temp          6.55G (99.2% utilization: Unpadded (5.76G) Padded (5.80G), 11.4% fragmentation (766.16M))
+  Largest program allocations in hbm:
+  1. Size: 1000.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95
+     Shape: f32[16,16,999,999]{2,3,1,0:T(8,128)}
+     Unpadded size: 974.61M
+     Extra memory due to padding: 25.39M (1.0x expansion)
+     XLA label: fusion.181.remat6 = fusion(bitcast.7446, bitcast.7444, fusion.14564), kind=kOutput, calls=fused_computation.177.clone.clone.clone.clone.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  2. Size: 312.19M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/feed_forward/intermediate_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,5120]{2,0,1:T(8,128)}
+     Unpadded size: 312.19M
+     XLA label: fusion.1572.remat = fusion(fusion.6366, get-tuple-element.20837, bitcast.11078), kind=kOutput, calls=fused_computation.1412.clone
+     Allocation type: HLO temp
+     ==========================
+  3. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (1,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[16,999,5120]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 156.09M
+     XLA label: fusion.24057 = fusion(fusion.1572.remat, get-tuple-element.20812, get-tuple-element.20811, get-tuple-element.20846, ...(+1)), kind=kOutput, calls=fused_computation.18874
+     Allocation type: HLO temp
+     ==========================
+  4. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/36/remat(core_fn)/36/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.7158 = fusion(get-tuple-element.9865, get-tuple-element.20385, get-tuple-element.10106, get-tuple-element.20384, ...(+3)), kind=kLoop, calls=fused_computation.6602
+     Allocation type: HLO temp
+     ==========================
+  5. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/16/16/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20090 = fusion(get-tuple-element.11264, get-tuple-element.13336, fusion.1629, bitcast.10986), kind=kOutput, calls=fused_computation.18656
+     Allocation type: HLO temp
+     ==========================
+  6. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/34/34/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20054 = fusion(fusion.6764.remat, get-tuple-element.13656, fusion.1611, bitcast.10950), kind=kOutput, calls=fused_computation.18620
+     Allocation type: HLO temp
+     ==========================
+  7. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/33/33/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20056 = fusion(fusion.6766.remat, get-tuple-element.13640, fusion.1612, bitcast.10952), kind=kOutput, calls=fused_computation.18622
+     Allocation type: HLO temp
+     ==========================
+  8. Size: 78.05M
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: copy.17574 = copy(bitcast.14536)
+     Allocation type: HLO temp
+     ==========================
+  9. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20124 = fusion(fusion.6836, copy.17574, get-tuple-element.13966, fusion.8945, ...(+1)), kind=kLoop, calls=fused_computation.18690
+     Allocation type: HLO temp
+     ==========================
+  10. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.6660.remat2 = fusion(get-tuple-element.20844, copy.15230.remat2, get-tuple-element.20831, bitcast.11077), kind=kOutput, calls=fused_computation.6104.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  11. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20122 = fusion(get-tuple-element.11200, get-tuple-element.13208, fusion.1645, bitcast.11018), kind=kOutput, calls=fused_computation.18688
+     Allocation type: HLO temp
+     ==========================
+  12. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20120 = fusion(get-tuple-element.11204, get-tuple-element.13224, fusion.1644, bitcast.11016), kind=kOutput, calls=fused_computation.18686
+     Allocation type: HLO temp
+     ==========================
+  13. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20118 = fusion(get-tuple-element.11208, get-tuple-element.13400, fusion.1643, bitcast.11014), kind=kOutput, calls=fused_computation.18684
+     Allocation type: HLO temp
+     ==========================
+  14. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20116 = fusion(get-tuple-element.11212, get-tuple-element.13576, fusion.1642, bitcast.11012), kind=kOutput, calls=fused_computation.18682
+     Allocation type: HLO temp
+     ==========================
+  15. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20114 = fusion(get-tuple-element.11216, get-tuple-element.13752, fusion.1641, bitcast.11010), kind=kOutput, calls=fused_computation.18680
+     Allocation type: HLO temp
+     ==========================
+  16. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20112 = fusion(get-tuple-element.11220, get-tuple-element.13896, fusion.1640, bitcast.11008), kind=kOutput, calls=fused_computation.18678
+     Allocation type: HLO temp
+     ==========================
+  17. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20110 = fusion(get-tuple-element.11224, get-tuple-element.13912, fusion.1639, bitcast.11006), kind=kOutput, calls=fused_computation.18676
+     Allocation type: HLO temp
+     ==========================
+  18. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20108 = fusion(get-tuple-element.11228, get-tuple-element.13928, fusion.1638, bitcast.11004), kind=kOutput, calls=fused_computation.18674
+     Allocation type: HLO temp
+     ==========================
+  19. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20106 = fusion(get-tuple-element.11232, get-tuple-element.13944, fusion.1637, bitcast.11002), kind=kOutput, calls=fused_computation.18672
+     Allocation type: HLO temp
+     ==========================
+  20. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20104 = fusion(get-tuple-element.11236, get-tuple-element.13960, fusion.1636, bitcast.11000), kind=kOutput, calls=fused_computation.18670
+     Allocation type: HLO temp
+     ==========================
+The stack trace below excludes JAX-internal frames.
+The preceding is the original exception that occurred, unmodified.
+--------------------
+The above exception was the direct cause of the following exception:
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 17.42G of 15.48G hbm. Exceeded hbm capacity by 1.94G.
+Total hbm usage >= 17.94G:
+    reserved        530.00M
+    program           6.62G
+    arguments        10.80G
+Output size 10.76G; shares 10.76G with arguments.
+Program hbm requirement 6.62G:
+    global           180.0K
+    scoped           72.08M
+    HLO temp          6.55G (99.2% utilization: Unpadded (5.76G) Padded (5.80G), 11.4% fragmentation (766.16M))
+  Largest program allocations in hbm:
+  1. Size: 1000.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95
+     Shape: f32[16,16,999,999]{2,3,1,0:T(8,128)}
+     Unpadded size: 974.61M
+     Extra memory due to padding: 25.39M (1.0x expansion)
+     XLA label: fusion.181.remat6 = fusion(bitcast.7446, bitcast.7444, fusion.14564), kind=kOutput, calls=fused_computation.177.clone.clone.clone.clone.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  2. Size: 312.19M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/feed_forward/intermediate_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,5120]{2,0,1:T(8,128)}
+     Unpadded size: 312.19M
+     XLA label: fusion.1572.remat = fusion(fusion.6366, get-tuple-element.20837, bitcast.11078), kind=kOutput, calls=fused_computation.1412.clone
+     Allocation type: HLO temp
+     ==========================
+  3. Size: 156.09M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (1,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[16,999,5120]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 156.09M
+     XLA label: fusion.24057 = fusion(fusion.1572.remat, get-tuple-element.20812, get-tuple-element.20811, get-tuple-element.20846, ...(+1)), kind=kOutput, calls=fused_computation.18874
+     Allocation type: HLO temp
+     ==========================
+  4. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/36/remat(core_fn)/36/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.7158 = fusion(get-tuple-element.9865, get-tuple-element.20385, get-tuple-element.10106, get-tuple-element.20384, ...(+3)), kind=kLoop, calls=fused_computation.6602
+     Allocation type: HLO temp
+     ==========================
+  5. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/16/16/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20090 = fusion(get-tuple-element.11264, get-tuple-element.13336, fusion.1629, bitcast.10986), kind=kOutput, calls=fused_computation.18656
+     Allocation type: HLO temp
+     ==========================
+  6. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/34/34/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20054 = fusion(fusion.6764.remat, get-tuple-element.13656, fusion.1611, bitcast.10950), kind=kOutput, calls=fused_computation.18620
+     Allocation type: HLO temp
+     ==========================
+  7. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/33/33/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20056 = fusion(fusion.6766.remat, get-tuple-element.13640, fusion.1612, bitcast.10952), kind=kOutput, calls=fused_computation.18622
+     Allocation type: HLO temp
+     ==========================
+  8. Size: 78.05M
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: copy.17574 = copy(bitcast.14536)
+     Allocation type: HLO temp
+     ==========================
+  9. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20124 = fusion(fusion.6836, copy.17574, get-tuple-element.13966, fusion.8945, ...(+1)), kind=kLoop, calls=fused_computation.18690
+     Allocation type: HLO temp
+     ==========================
+  10. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/35/remat(core_fn)/35/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.6660.remat2 = fusion(get-tuple-element.20844, copy.15230.remat2, get-tuple-element.20831, bitcast.11077), kind=kOutput, calls=fused_computation.6104.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  11. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20122 = fusion(get-tuple-element.11200, get-tuple-element.13208, fusion.1645, bitcast.11018), kind=kOutput, calls=fused_computation.18688
+     Allocation type: HLO temp
+     ==========================
+  12. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20120 = fusion(get-tuple-element.11204, get-tuple-element.13224, fusion.1644, bitcast.11016), kind=kOutput, calls=fused_computation.18686
+     Allocation type: HLO temp
+     ==========================
+  13. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20118 = fusion(get-tuple-element.11208, get-tuple-element.13400, fusion.1643, bitcast.11014), kind=kOutput, calls=fused_computation.18684
+     Allocation type: HLO temp
+     ==========================
+  14. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20116 = fusion(get-tuple-element.11212, get-tuple-element.13576, fusion.1642, bitcast.11012), kind=kOutput, calls=fused_computation.18682
+     Allocation type: HLO temp
+     ==========================
+  15. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20114 = fusion(get-tuple-element.11216, get-tuple-element.13752, fusion.1641, bitcast.11010), kind=kOutput, calls=fused_computation.18680
+     Allocation type: HLO temp
+     ==========================
+  16. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/5/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20112 = fusion(get-tuple-element.11220, get-tuple-element.13896, fusion.1640, bitcast.11008), kind=kOutput, calls=fused_computation.18678
+     Allocation type: HLO temp
+     ==========================
+  17. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/6/6/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20110 = fusion(get-tuple-element.11224, get-tuple-element.13912, fusion.1639, bitcast.11006), kind=kOutput, calls=fused_computation.18676
+     Allocation type: HLO temp
+     ==========================
+  18. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20108 = fusion(get-tuple-element.11228, get-tuple-element.13928, fusion.1638, bitcast.11004), kind=kOutput, calls=fused_computation.18674
+     Allocation type: HLO temp
+     ==========================
+  19. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/8/8/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20106 = fusion(get-tuple-element.11232, get-tuple-element.13944, fusion.1637, bitcast.11002), kind=kOutput, calls=fused_computation.18672
+     Allocation type: HLO temp
+     ==========================
+  20. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[16,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 78.05M
+     XLA label: fusion.20104 = fusion(get-tuple-element.11236, get-tuple-element.13960, fusion.1636, bitcast.11000), kind=kOutput, calls=fused_computation.18670
+     Allocation type: HLO temp
+     ==========================
\ No newline at end of file
diff --git a/wandb/run-20220730_083825-1jwtqtqg/files/requirements.txt b/wandb/run-20220730_083825-1jwtqtqg/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab
--- /dev/null
+++ b/wandb/run-20220730_083825-1jwtqtqg/files/requirements.txt
@@ -0,0 +1,158 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+exceptiongroup==1.0.0rc8
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+hypothesis==6.53.0
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+jiwer==2.3.0
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pyctcdecode==0.4.0
+pygments==2.11.1
+pygtrie==2.5.0
+pyparsing==3.0.6
+python-dateutil==2.8.2
+python-levenshtein==0.12.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+sortedcontainers==2.4.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220730_083825-1jwtqtqg/files/wandb-metadata.json b/wandb/run-20220730_083825-1jwtqtqg/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a9168dc9d1d93de8b629db4bd1ae50782a9539e
--- /dev/null
+++ b/wandb/run-20220730_083825-1jwtqtqg/files/wandb-metadata.json
@@ -0,0 +1,66 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-30T08:38:29.242261",
+    "startedAt": "2022-07-30T08:38:25.752508",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=./",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=16",
+        "--per_device_eval_batch_size=16",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220730_083825-1jwtqtqg/files/wandb-summary.json b/wandb/run-20220730_083825-1jwtqtqg/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..a7ef6c1b2317e4698b2d4108e7c29c613ad653db
--- /dev/null
+++ b/wandb/run-20220730_083825-1jwtqtqg/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 1012}}
\ No newline at end of file
diff --git a/wandb/run-20220730_083825-1jwtqtqg/logs/debug-internal.log b/wandb/run-20220730_083825-1jwtqtqg/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..438502a84c016369287cabda9173a7fc3e0fceb3
--- /dev/null
+++ b/wandb/run-20220730_083825-1jwtqtqg/logs/debug-internal.log
@@ -0,0 +1,556 @@
+2022-07-30 08:38:26,680 INFO    MainThread:87547 [internal.py:wandb_internal():87] W&B internal server running at pid: 87547, started at: 2022-07-30 08:38:26.680310
+2022-07-30 08:38:26,682 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: check_version
+2022-07-30 08:38:26,683 DEBUG   SenderThread:87547 [sender.py:send():234] send: header
+2022-07-30 08:38:26,683 INFO    WriterThread:87547 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/run-1jwtqtqg.wandb
+2022-07-30 08:38:26,683 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: check_version
+2022-07-30 08:38:26,721 DEBUG   SenderThread:87547 [sender.py:send():234] send: run
+2022-07-30 08:38:26,926 INFO    SenderThread:87547 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files
+2022-07-30 08:38:26,926 INFO    SenderThread:87547 [sender.py:_start_run_threads():804] run started: 1jwtqtqg with start time 1659170305
+2022-07-30 08:38:26,926 DEBUG   SenderThread:87547 [sender.py:send():234] send: summary
+2022-07-30 08:38:26,927 INFO    SenderThread:87547 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 08:38:26,928 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: run_start
+2022-07-30 08:38:27,928 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/wandb-summary.json
+2022-07-30 08:38:29,241 DEBUG   HandlerThread:87547 [meta.py:__init__():40] meta init
+2022-07-30 08:38:29,242 DEBUG   HandlerThread:87547 [meta.py:__init__():54] meta init done
+2022-07-30 08:38:29,242 DEBUG   HandlerThread:87547 [meta.py:probe():214] probe
+2022-07-30 08:38:29,244 DEBUG   HandlerThread:87547 [meta.py:_setup_git():204] setup git
+2022-07-30 08:38:29,289 DEBUG   HandlerThread:87547 [meta.py:_setup_git():211] setup git done
+2022-07-30 08:38:29,289 DEBUG   HandlerThread:87547 [meta.py:_save_code():92] save code
+2022-07-30 08:38:29,303 DEBUG   HandlerThread:87547 [meta.py:_save_code():113] save code done
+2022-07-30 08:38:29,303 DEBUG   HandlerThread:87547 [meta.py:_save_patches():130] save patches
+2022-07-30 08:38:29,384 DEBUG   HandlerThread:87547 [meta.py:_save_patches():172] save patches done
+2022-07-30 08:38:29,384 DEBUG   HandlerThread:87547 [meta.py:_save_pip():58] save pip
+2022-07-30 08:38:29,385 DEBUG   HandlerThread:87547 [meta.py:_save_pip():72] save pip done
+2022-07-30 08:38:29,385 DEBUG   HandlerThread:87547 [meta.py:probe():252] probe done
+2022-07-30 08:38:29,388 DEBUG   SenderThread:87547 [sender.py:send():234] send: files
+2022-07-30 08:38:29,389 INFO    SenderThread:87547 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-30 08:38:29,389 INFO    SenderThread:87547 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-30 08:38:29,390 INFO    SenderThread:87547 [sender.py:_save_file():939] saving file diff.patch with policy now
+2022-07-30 08:38:29,395 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:38:29,396 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:38:29,884 INFO    Thread-11 :87547 [upload_job.py:push():137] Uploaded file /tmp/tmps4tk8s_gwandb/331ec8hl-wandb-metadata.json
+2022-07-30 08:38:29,931 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/diff.patch
+2022-07-30 08:38:29,931 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:38:29,931 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/wandb-metadata.json
+2022-07-30 08:38:29,931 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/code/run_flax_speech_recognition_ctc.py
+2022-07-30 08:38:29,931 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/requirements.txt
+2022-07-30 08:38:29,931 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/code
+2022-07-30 08:38:29,935 INFO    Thread-13 :87547 [upload_job.py:push():137] Uploaded file /tmp/tmps4tk8s_gwandb/1tu8yniu-diff.patch
+2022-07-30 08:38:30,103 INFO    Thread-12 :87547 [upload_job.py:push():137] Uploaded file /tmp/tmps4tk8s_gwandb/69l0n8jf-code/run_flax_speech_recognition_ctc.py
+2022-07-30 08:38:31,932 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:38:33,932 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:38:35,934 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:38:37,935 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:38:43,938 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:38:44,561 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:38:44,562 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:38:45,939 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:38:57,332 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:38:57,943 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:38:59,702 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:38:59,703 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:38:59,944 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:39:01,945 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:39:10,949 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:39:12,950 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:39:14,860 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:39:14,860 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:39:14,951 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:39:27,405 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:39:28,957 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:39:30,001 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:39:30,001 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:39:45,152 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:39:45,153 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:39:57,471 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:40:00,306 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:40:00,307 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:40:12,975 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:14,976 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:15,474 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:40:15,475 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:40:16,977 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:18,977 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:20,978 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:22,979 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:24,980 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:26,981 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:27,540 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:40:28,982 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:30,655 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:40:30,655 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:40:30,983 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:32,984 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:34,985 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:36,986 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:38,987 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:40,988 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:42,990 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:44,992 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:46,024 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:40:46,024 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:40:46,993 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:48,994 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:50,995 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:52,996 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:54,997 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:56,998 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:40:57,607 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:40:58,999 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:01,000 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:01,159 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:41:01,159 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:41:03,001 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:05,003 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:07,004 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:09,005 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:11,006 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:13,007 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:15,008 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:16,308 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:41:16,308 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:41:17,009 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:19,010 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:21,011 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:23,012 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:25,013 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:27,014 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:27,686 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:41:29,018 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:31,019 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:31,471 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:41:31,471 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:41:33,020 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:35,021 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:37,022 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:39,023 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:41,024 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:43,025 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:45,026 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:46,608 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:41:46,608 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:41:47,028 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:49,028 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:51,029 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:53,030 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:55,031 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:57,036 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:41:57,762 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:41:59,037 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:01,038 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:01,751 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:42:01,752 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:42:03,039 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:05,040 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:07,041 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:09,042 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:11,043 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:13,044 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:15,046 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:16,897 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:42:16,898 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:42:17,046 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:19,047 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:21,048 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:23,049 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:25,050 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:27,051 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:27,839 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:42:29,052 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:32,043 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:42:32,043 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:42:32,054 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:34,055 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:36,056 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:38,058 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:40,058 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:42,060 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:44,061 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:46,062 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:47,185 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:42:47,185 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:42:48,063 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:50,064 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:52,065 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:54,066 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:56,067 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:42:57,924 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:42:58,068 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:00,069 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:02,070 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:02,347 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:43:02,348 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:43:04,071 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:06,072 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:08,073 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:10,074 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:12,076 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:14,077 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:16,078 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:17,491 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:43:17,491 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:43:18,080 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:20,081 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:22,082 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:24,083 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:26,085 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:28,002 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:43:28,086 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:30,088 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:32,088 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:32,633 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:43:32,633 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:43:34,089 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:36,090 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:38,091 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:40,092 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:42,093 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:44,094 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:46,095 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:47,771 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:43:47,772 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:43:48,097 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:50,098 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:52,099 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:54,100 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:56,101 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:43:58,085 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:43:58,102 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:00,103 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:02,104 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:02,924 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:44:02,925 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:44:04,106 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:06,107 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:08,108 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:10,109 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:12,110 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:14,111 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:16,113 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:18,074 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:44:18,075 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:44:18,114 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:20,115 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:22,116 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:24,117 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:26,118 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:28,119 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:28,166 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:44:30,120 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:32,121 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:33,222 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:44:33,222 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:44:34,122 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:36,123 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:38,124 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:40,125 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:42,126 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:44,127 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:46,128 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:48,129 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:48,359 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:44:48,359 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:44:50,130 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:52,131 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:54,133 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:56,135 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:58,137 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:44:58,256 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:45:00,138 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:02,139 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:03,530 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:45:03,530 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:45:04,140 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:06,141 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:08,142 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:11,143 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:13,144 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:15,145 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:17,146 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:18,671 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:45:18,672 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:45:19,147 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:21,148 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:23,149 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:25,151 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:27,152 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:28,334 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:45:29,153 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:31,155 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:33,156 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:33,836 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:45:33,836 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:45:35,157 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:37,158 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:39,159 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:41,160 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:43,161 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:45,162 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:47,162 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:48,981 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:45:48,981 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:45:49,163 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:45:58,416 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:46:04,118 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:46:04,119 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:46:19,477 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:46:19,477 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:46:25,181 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:27,182 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:28,496 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:46:29,183 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:31,184 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:33,185 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:34,735 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:46:34,735 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:46:35,187 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:37,188 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:39,189 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:41,190 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:43,191 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:45,192 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:47,193 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:49,194 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:49,900 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:46:49,900 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:46:51,195 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:53,196 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:55,197 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:46:58,577 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:46:59,199 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:47:01,200 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:47:05,046 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:47:05,046 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:47:05,202 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:47:07,203 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:47:09,204 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:47:11,205 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:47:13,206 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:47:20,187 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:47:20,187 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:47:28,648 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:47:35,320 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:47:35,320 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:47:43,220 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:47:50,672 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:47:50,672 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:47:54,225 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:47:56,226 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:47:58,227 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:47:58,722 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:48:00,228 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:48:02,229 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:48:04,230 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:48:05,900 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:48:05,900 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:48:06,231 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:48:08,232 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:48:10,233 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:48:12,234 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:48:14,235 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:48:16,236 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:48:18,237 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:48:20,238 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:48:21,039 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:48:21,039 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:48:22,240 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:48:26,242 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:48:28,243 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:48:28,802 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:48:30,244 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:48:36,185 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:48:36,185 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:48:51,341 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:48:51,341 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:48:58,878 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:49:06,476 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:49:06,477 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:49:14,269 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:49:21,615 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:49:21,616 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:49:28,952 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:49:36,750 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:49:36,750 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:49:51,888 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:49:51,888 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:49:59,026 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:50:05,291 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:50:07,073 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:50:07,073 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:50:13,295 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:50:22,299 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:50:22,333 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:50:22,333 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:50:29,103 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:50:30,303 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:50:36,306 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:50:37,755 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:50:37,755 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:50:45,311 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:50:47,312 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:50:51,314 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:50:52,966 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:50:52,966 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:50:55,316 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:50:57,317 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:50:59,182 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:51:03,320 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:51:08,255 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:51:08,255 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:51:23,531 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:51:23,531 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:51:29,261 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:51:37,334 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:51:38,693 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:51:38,694 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:51:45,337 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:51:47,338 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:51:53,869 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:51:53,870 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:51:59,339 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:52:09,018 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:52:09,018 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:52:24,153 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:52:24,153 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:52:29,417 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:52:39,288 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:52:39,288 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:52:54,426 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:52:54,426 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:52:59,496 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:53:09,558 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:53:09,559 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:53:24,692 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:53:24,693 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:53:29,575 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:53:39,829 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:53:39,829 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:53:54,966 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:53:54,967 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:53:59,650 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:54:10,100 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:54:10,100 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:54:25,249 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:54:25,250 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:54:29,728 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:54:40,383 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:54:40,384 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:54:55,518 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:54:55,518 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:54:59,952 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:55:10,659 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 08:55:10,659 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 08:55:19,446 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:55:19,859 DEBUG   SenderThread:87547 [sender.py:send():234] send: telemetry
+2022-07-30 08:55:19,859 DEBUG   SenderThread:87547 [sender.py:send():234] send: exit
+2022-07-30 08:55:19,859 INFO    SenderThread:87547 [sender.py:send_exit():366] handling exit code: 1
+2022-07-30 08:55:19,861 INFO    SenderThread:87547 [sender.py:send_exit():368] handling runtime: 1012
+2022-07-30 08:55:19,861 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:55:19,861 INFO    SenderThread:87547 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 08:55:19,862 INFO    SenderThread:87547 [sender.py:send_exit():374] send defer
+2022-07-30 08:55:19,862 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:55:19,863 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:55:19,863 INFO    HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-30 08:55:19,863 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:55:19,863 INFO    SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-30 08:55:19,863 INFO    SenderThread:87547 [sender.py:transition_state():387] send defer: 1
+2022-07-30 08:55:19,864 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:55:19,864 INFO    HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-30 08:55:19,905 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:55:19,905 INFO    SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-30 08:55:19,905 INFO    SenderThread:87547 [sender.py:transition_state():387] send defer: 2
+2022-07-30 08:55:19,905 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:55:19,905 INFO    HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-30 08:55:19,905 DEBUG   SenderThread:87547 [sender.py:send():234] send: stats
+2022-07-30 08:55:19,906 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:55:19,906 INFO    SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-30 08:55:19,906 INFO    SenderThread:87547 [sender.py:transition_state():387] send defer: 3
+2022-07-30 08:55:19,906 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:55:19,906 INFO    HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-30 08:55:19,907 DEBUG   SenderThread:87547 [sender.py:send():234] send: summary
+2022-07-30 08:55:19,907 INFO    SenderThread:87547 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 08:55:19,907 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:55:19,907 INFO    SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-30 08:55:19,907 INFO    SenderThread:87547 [sender.py:transition_state():387] send defer: 4
+2022-07-30 08:55:19,908 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:55:19,908 INFO    HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-30 08:55:19,908 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:55:19,908 INFO    SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-30 08:55:19,965 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:55:20,076 INFO    SenderThread:87547 [sender.py:transition_state():387] send defer: 5
+2022-07-30 08:55:20,076 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:55:20,077 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:55:20,077 INFO    HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-30 08:55:20,077 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:55:20,077 INFO    SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-30 08:55:20,078 INFO    SenderThread:87547 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-30 08:55:20,178 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:55:20,446 INFO    Thread-8  :87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/config.yaml
+2022-07-30 08:55:20,447 INFO    SenderThread:87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:55:20,447 INFO    SenderThread:87547 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/wandb-summary.json
+2022-07-30 08:55:20,447 INFO    SenderThread:87547 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files
+2022-07-30 08:55:20,447 INFO    SenderThread:87547 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/config.yaml config.yaml
+2022-07-30 08:55:20,447 INFO    SenderThread:87547 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/diff.patch diff.patch
+2022-07-30 08:55:20,447 INFO    SenderThread:87547 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/requirements.txt requirements.txt
+2022-07-30 08:55:20,448 INFO    SenderThread:87547 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log output.log
+2022-07-30 08:55:20,448 INFO    SenderThread:87547 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/wandb-summary.json wandb-summary.json
+2022-07-30 08:55:20,448 INFO    SenderThread:87547 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/wandb-metadata.json wandb-metadata.json
+2022-07-30 08:55:20,451 INFO    SenderThread:87547 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-30 08:55:20,451 INFO    SenderThread:87547 [sender.py:transition_state():387] send defer: 6
+2022-07-30 08:55:20,451 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:55:20,460 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:55:20,461 INFO    HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-30 08:55:20,461 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:55:20,461 INFO    SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-30 08:55:20,461 INFO    SenderThread:87547 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 08:55:20,555 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:55:20,556 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:55:20,657 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:55:20,657 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:55:20,758 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:55:20,758 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:55:20,860 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:55:20,860 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:55:20,915 INFO    Thread-15 :87547 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/requirements.txt
+2022-07-30 08:55:20,950 INFO    Thread-17 :87547 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/wandb-summary.json
+2022-07-30 08:55:20,957 INFO    Thread-14 :87547 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/config.yaml
+2022-07-30 08:55:20,962 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:55:20,962 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:55:21,063 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:55:21,063 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:55:21,121 INFO    Thread-16 :87547 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/files/output.log
+2022-07-30 08:55:21,165 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:55:21,165 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:55:21,266 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:55:21,266 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:55:21,322 INFO    Thread-7  :87547 [sender.py:transition_state():387] send defer: 7
+2022-07-30 08:55:21,322 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:55:21,322 INFO    HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-30 08:55:21,323 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:55:21,323 INFO    SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-30 08:55:21,367 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:55:21,810 INFO    SenderThread:87547 [sender.py:transition_state():387] send defer: 8
+2022-07-30 08:55:21,810 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:55:21,811 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:55:21,811 INFO    HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-30 08:55:21,812 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:55:21,812 INFO    SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-30 08:55:21,812 INFO    SenderThread:87547 [sender.py:transition_state():387] send defer: 9
+2022-07-30 08:55:21,812 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 08:55:21,812 INFO    HandlerThread:87547 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-30 08:55:21,813 DEBUG   SenderThread:87547 [sender.py:send():234] send: final
+2022-07-30 08:55:21,813 DEBUG   SenderThread:87547 [sender.py:send():234] send: footer
+2022-07-30 08:55:21,813 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: defer
+2022-07-30 08:55:21,813 INFO    SenderThread:87547 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-30 08:55:21,912 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 08:55:21,912 DEBUG   SenderThread:87547 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 08:55:21,913 INFO    SenderThread:87547 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 08:55:22,166 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-30 08:55:22,167 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-30 08:55:22,168 DEBUG   HandlerThread:87547 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-30 08:55:22,168 INFO    HandlerThread:87547 [handler.py:finish():731] shutting down handler
+2022-07-30 08:55:22,813 INFO    WriterThread:87547 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/run-1jwtqtqg.wandb
+2022-07-30 08:55:23,165 INFO    SenderThread:87547 [sender.py:finish():1070] shutting down sender
+2022-07-30 08:55:23,166 INFO    SenderThread:87547 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 08:55:23,166 INFO    SenderThread:87547 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 08:55:23,169 INFO    MainThread:87547 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220730_083825-1jwtqtqg/logs/debug.log b/wandb/run-20220730_083825-1jwtqtqg/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..ce1416381bfb6880eda6908eca34a96692623a71
--- /dev/null
+++ b/wandb/run-20220730_083825-1jwtqtqg/logs/debug.log
@@ -0,0 +1,148 @@
+2022-07-30 08:38:25,754 INFO    MainThread:86199 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-30 08:38:25,754 INFO    MainThread:86199 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-30 08:38:25,754 INFO    MainThread:86199 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/logs/debug.log
+2022-07-30 08:38:25,754 INFO    MainThread:86199 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_083825-1jwtqtqg/logs/debug-internal.log
+2022-07-30 08:38:25,754 INFO    MainThread:86199 [wandb_init.py:init():404] calling init triggers
+2022-07-30 08:38:25,754 INFO    MainThread:86199 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-30 08:38:25,754 INFO    MainThread:86199 [wandb_init.py:init():460] starting backend
+2022-07-30 08:38:25,754 INFO    MainThread:86199 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-30 08:38:25,812 INFO    MainThread:86199 [backend.py:ensure_launched():216] starting backend process...
+2022-07-30 08:38:25,858 INFO    MainThread:86199 [backend.py:ensure_launched():221] started backend process with pid: 87547
+2022-07-30 08:38:25,860 INFO    MainThread:86199 [wandb_init.py:init():469] backend started and connected
+2022-07-30 08:38:25,875 INFO    MainThread:86199 [wandb_init.py:init():533] updated telemetry
+2022-07-30 08:38:25,991 INFO    MainThread:86199 [wandb_init.py:init():563] communicating current version
+2022-07-30 08:38:26,719 INFO    MainThread:86199 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-30 08:38:26,720 INFO    MainThread:86199 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-30 08:38:26,927 INFO    MainThread:86199 [wandb_init.py:init():606] starting run threads in backend
+2022-07-30 08:38:29,392 INFO    MainThread:86199 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-30 08:38:29,393 INFO    MainThread:86199 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-30 08:38:29,393 INFO    MainThread:86199 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-30 08:38:29,395 INFO    MainThread:86199 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-30 08:38:29,395 INFO    MainThread:86199 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-30 08:55:17,539 INFO    MainThread:86199 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-30 08:55:17,545 INFO    MainThread:86199 [wandb_run.py:_restore():1752] restore
+2022-07-30 08:55:19,863 INFO    MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 74317
+}
+
+2022-07-30 08:55:20,077 INFO    MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 74317
+}
+
+2022-07-30 08:55:20,454 INFO    MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 402992
+}
+
+2022-07-30 08:55:20,556 INFO    MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 402992
+}
+
+2022-07-30 08:55:20,657 INFO    MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 402992
+  total_bytes: 402992
+}
+
+2022-07-30 08:55:20,759 INFO    MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 402992
+  total_bytes: 402992
+}
+
+2022-07-30 08:55:20,861 INFO    MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 402992
+  total_bytes: 402992
+}
+
+2022-07-30 08:55:20,962 INFO    MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 402992
+  total_bytes: 402992
+}
+
+2022-07-30 08:55:21,064 INFO    MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 402992
+  total_bytes: 402992
+}
+
+2022-07-30 08:55:21,165 INFO    MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 402992
+  total_bytes: 402992
+}
+
+2022-07-30 08:55:21,267 INFO    MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 402992
+  total_bytes: 402992
+}
+
+2022-07-30 08:55:21,811 INFO    MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 402992
+  total_bytes: 402992
+}
+
+2022-07-30 08:55:22,166 INFO    MainThread:86199 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 402992
+  total_bytes: 402992
+}
+local_info {
+}
+
+2022-07-30 08:55:23,802 INFO    MainThread:86199 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220730_083825-1jwtqtqg/run-1jwtqtqg.wandb b/wandb/run-20220730_083825-1jwtqtqg/run-1jwtqtqg.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..5d38a5e7bc2416b6cb3dbeb0d80a9200b273f9a7
--- /dev/null
+++ b/wandb/run-20220730_083825-1jwtqtqg/run-1jwtqtqg.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f517ba230a4b8d99820d7afad49dde911ae46c3cce8c7015ee6ce592b73a6777
+size 443514
diff --git a/wandb/run-20220730_093953-16dexcvn/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_093953-16dexcvn/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac
--- /dev/null
+++ b/wandb/run-20220730_093953-16dexcvn/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1605 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220730_093953-16dexcvn/files/config.yaml b/wandb/run-20220730_093953-16dexcvn/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..98b5896181e96443fa663c1aa8366b535092204d
--- /dev/null
+++ b/wandb/run-20220730_093953-16dexcvn/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659173993
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220730_093953-16dexcvn/files/diff.patch b/wandb/run-20220730_093953-16dexcvn/files/diff.patch
new file mode 100644
index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642
--- /dev/null
+++ b/wandb/run-20220730_093953-16dexcvn/files/diff.patch
@@ -0,0 +1,10 @@
+diff --git a/.gitattributes b/.gitattributes
+index 755356a..f0eef4b 100644
+--- a/.gitattributes
++++ b/.gitattributes
+@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+ *.zip filter=lfs diff=lfs merge=lfs -text
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
++*.wandb filter=lfs diff=lfs merge=lfs -text
+\ No newline at end of file
diff --git a/wandb/run-20220730_093953-16dexcvn/files/output.log b/wandb/run-20220730_093953-16dexcvn/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..d8c67e7338fafb06ba08ce41c8407f7102009772
--- /dev/null
+++ b/wandb/run-20220730_093953-16dexcvn/files/output.log
@@ -0,0 +1,1614 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul30_09-39-49_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=12,
+per_device_train_batch_size=12,
+precision=full,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 78.32it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 469.02it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_hid', 'kernel'), ('project_q', 'kernel'), ('project_hid', 'bias'), ('project_q', 'bias'), ('quantizer', 'codevectors'), ('quantizer', 'weight_proj', 'kernel'), ('quantizer', 'weight_proj', 'bias')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9005.53ex/s]
+removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8305.21ex/s]
+removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8893.39ex/s]
+removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8962.96ex/s]
+removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8046.06ex/s]
+removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8535.17ex/s]
+removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8824.27ex/s]
+removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8296.15ex/s]
+removing punctuation from train split #9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8468.31ex/s]
+removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8493.52ex/s]
+removing punctuation from train split #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8583.75ex/s]
+removing punctuation from train split #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8299.35ex/s]
+removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 6715.33ex/s]
+removing punctuation from train split #14: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8398.49ex/s]
+removing punctuation from train split #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8081.26ex/s]
+removing punctuation from train split #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8371.54ex/s]
+removing punctuation from train split #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8235.09ex/s]
+removing punctuation from train split #7:  88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                        | 8361/9523 [00:01<00:00, 6123.53ex/s]
+removing punctuation from train split #7:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋      | 9215/9523 [00:01<00:00, 6776.16ex/s]
+removing punctuation from train split #12:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                      | 7654/9522 [00:00<00:00, 7914.30ex/s]
+removing punctuation from train split #11:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌               | 8772/9523 [00:01<00:00, 8762.24ex/s]
+removing punctuation from train split #14:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                       | 6836/9522 [00:00<00:00, 8633.05ex/s]
+removing punctuation from train split #12:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                     | 8506/9522 [00:01<00:00, 8091.22ex/s]
+removing punctuation from train split #12:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍   | 9346/9522 [00:01<00:00, 8180.56ex/s]
+removing punctuation from train split #13:  87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                         | 8282/9522 [00:01<00:00, 7900.65ex/s]
+removing punctuation from train split #14:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                   | 8579/9522 [00:01<00:00, 8236.27ex/s]
+removing punctuation from train split #13:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌        | 9110/9522 [00:01<00:00, 8012.09ex/s]
+removing punctuation from train split #14:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9456/9522 [00:01<00:00, 8390.83ex/s]
+removing punctuation from train split #17:  70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                          | 6679/9522 [00:00<00:00, 8541.97ex/s]
+removing punctuation from train split #16:  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                        | 7538/9522 [00:00<00:00, 7832.63ex/s]
+removing punctuation from train split #15:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9431/9522 [00:01<00:00, 8358.51ex/s]
+removing punctuation from train split #16:  88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                       | 8394/9522 [00:01<00:00, 8042.70ex/s]
+removing punctuation from train split #16:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊     | 9270/9522 [00:01<00:00, 8252.38ex/s]
+removing punctuation from train split #17:  88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                       | 8404/9522 [00:01<00:00, 8140.35ex/s]
+removing punctuation from train split #18:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                   | 8586/9522 [00:01<00:00, 8490.22ex/s]
+removing punctuation from train split #19:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                       | 7629/9522 [00:00<00:00, 8297.75ex/s]
+removing punctuation from train split #19:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                    | 8520/9522 [00:01<00:00, 8479.08ex/s]
+removing punctuation from train split #21:  70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                           | 6656/9522 [00:00<00:00, 8263.56ex/s]
+removing punctuation from train split #22:  71%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                        | 6766/9522 [00:00<00:00, 8573.39ex/s]
+removing punctuation from train split #23:  63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                        | 5994/9522 [00:00<00:00, 8650.52ex/s]
+removing punctuation from train split #23:  72%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                      | 6880/9522 [00:00<00:00, 8715.22ex/s]
+removing punctuation from train split #25:  54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                          | 5110/9522 [00:00<00:00, 7778.84ex/s]
+removing punctuation from train split #26:  53%|████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                           | 5086/9522 [00:00<00:00, 8627.94ex/s]
+removing punctuation from train split #27:  44%|███████████████████████████████████████████████████████████████████████████████████████▏                                                                                                            | 4234/9522 [00:00<00:00, 8573.14ex/s]
+removing punctuation from train split #28:  26%|██████████████████████████████████████████████████▊                                                                                                                                                 | 2469/9522 [00:00<00:00, 8323.04ex/s]
+removing punctuation from train split #29:  26%|███████████████████████████████████████████████████▍                                                                                                                                                | 2499/9522 [00:00<00:00, 8407.52ex/s]
+removing punctuation from train split #30:  26%|██████████████████████████████████████████████████▍                                                                                                                                                 | 2450/9522 [00:00<00:00, 8264.69ex/s]
+removing punctuation from train split #25:  82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                   | 7791/9522 [00:00<00:00, 8535.43ex/s]
+removing punctuation from train split #24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 9496/9522 [00:01<00:00, 8276.56ex/s]
+removing punctuation from train split #25:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                | 8727/9522 [00:01<00:00, 8779.85ex/s]
+removing punctuation from train split #26:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                 | 8659/9522 [00:01<00:00, 8862.78ex/s]
+removing punctuation from train split #27:  82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                  | 7855/9522 [00:00<00:00, 9026.60ex/s]
+removing punctuation from train split #28:  75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                | 7169/9522 [00:00<00:00, 9404.61ex/s]
+removing punctuation from train split #29:  74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                   | 7024/9522 [00:00<00:00, 9096.91ex/s]
+removing punctuation from train split #27:  92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████               | 8797/9522 [00:01<00:00, 9144.61ex/s]
+removing punctuation from train split #28:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                            | 8145/9522 [00:00<00:00, 9514.75ex/s]
+removing punctuation from train split #28:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋        | 9119/9522 [00:01<00:00, 9582.49ex/s]
+removing punctuation from train split #29:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████            | 8941/9522 [00:01<00:00, 9354.41ex/s]
+removing punctuation from train split #30:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                               | 8017/9522 [00:00<00:00, 9333.88ex/s]
+removing punctuation from train split #30:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋           | 8971/9522 [00:01<00:00, 9395.12ex/s]
+removing punctuation from train split #31:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                            | 8154/9522 [00:00<00:00, 9528.76ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00022_of_00032.arrow9121/9522 [00:01<00:00, 9572.00ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow
+preprocess dataset #0:   1%|█▉                                                                                                                                                                                                                          | 85/9497 [00:02<02:36, 60.05ex/s]
+preprocess dataset #1:   1%|█▏                                                                                                                                                                                                                          | 53/9497 [00:01<04:04, 38.56ex/s]
+preprocess dataset #2:   0%|                                                                                                                                                                                                                             | 5/9497 [00:00<21:51,  7.24ex/s]
+preprocess dataset #3:   1%|█▋                                                                                                                                                                                                                          | 74/9497 [00:02<03:33, 44.18ex/s]
+preprocess dataset #4:   1%|█▏                                                                                                                                                                                                                          | 51/9497 [00:01<03:39, 42.96ex/s]
+preprocess dataset #5:   0%|▏                                                                                                                                                                                                                            | 7/9497 [00:00<15:45, 10.04ex/s]
+preprocess dataset #6:   1%|█▎                                                                                                                                                                                                                          | 55/9497 [00:01<03:30, 44.83ex/s]
+preprocess dataset #7:   0%|▋                                                                                                                                                                                                                           | 29/9497 [00:01<04:14, 37.17ex/s]
+preprocess dataset #8:   1%|█▋                                                                                                                                                                                                                          | 74/9497 [00:02<03:32, 44.29ex/s]
+preprocess dataset #9:   0%|▋                                                                                                                                                                                                                           | 32/9497 [00:01<04:46, 32.99ex/s]
+preprocess dataset #10:   1%|█▋                                                                                                                                                                                                                         | 72/9497 [00:02<04:24, 35.59ex/s]
+preprocess dataset #11:   0%|▉                                                                                                                                                                                                                          | 43/9496 [00:01<03:33, 44.37ex/s]
+preprocess dataset #12:   0%|                                                                                                                                                                                                                                    | 0/9496 [00:00<?, ?ex/s]
+preprocess dataset #13:   1%|█▎                                                                                                                                                                                                                         | 55/9496 [00:02<03:52, 40.56ex/s]
+preprocess dataset #14:   1%|█▉                                                                                                                                                                                                                         | 86/9496 [00:02<03:48, 41.13ex/s]
+preprocess dataset #15:   1%|█                                                                                                                                                                                                                          | 48/9496 [00:01<03:51, 40.81ex/s]
+preprocess dataset #16:   1%|██▌                                                                                                                                                                                                                       | 110/9496 [00:03<04:08, 37.77ex/s]
+preprocess dataset #17:   1%|█▎                                                                                                                                                                                                                         | 56/9496 [00:02<04:02, 38.85ex/s]
+preprocess dataset #18:   0%|▌                                                                                                                                                                                                                          | 22/9496 [00:01<05:56, 26.55ex/s]
+preprocess dataset #19:   0%|▉                                                                                                                                                                                                                          | 40/9496 [00:02<06:10, 25.50ex/s]
+preprocess dataset #20:   1%|█▌                                                                                                                                                                                                                         | 70/9496 [00:02<04:48, 32.65ex/s]
+preprocess dataset #21:   0%|▉                                                                                                                                                                                                                          | 41/9496 [00:02<05:13, 30.13ex/s]
+preprocess dataset #22:   1%|█▊                                                                                                                                                                                                                         | 78/9496 [00:03<05:10, 30.34ex/s]
+preprocess dataset #23:   0%|▋                                                                                                                                                                                                                          | 32/9496 [00:01<05:40, 27.81ex/s]
+preprocess dataset #24:   1%|█▌                                                                                                                                                                                                                         | 68/9496 [00:03<05:38, 27.84ex/s]
+preprocess dataset #25:   0%|▌                                                                                                                                                                                                                          | 25/9496 [00:01<06:42, 23.53ex/s]
+preprocess dataset #26:   0%|█                                                                                                                                                                                                                          | 47/9496 [00:02<05:08, 30.61ex/s]
+preprocess dataset #27:   0%|▏                                                                                                                                                                                                                           | 7/9496 [00:00<16:07,  9.80ex/s]
+preprocess dataset #28:   0%|█                                                                                                                                                                                                                          | 44/9496 [00:02<05:17, 29.81ex/s]
+preprocess dataset #29:   0%|                                                                                                                                                                                                                            | 4/9496 [00:00<31:15,  5.06ex/s]
+preprocess dataset #30:   0%|▌                                                                                                                                                                                                                          | 23/9496 [00:01<07:45, 20.36ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                   | 5150/9496 [02:50<02:32, 28.45ex/s]
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #8:  69%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                   | 6571/9497 [03:35<01:19, 36.90ex/s]
+preprocess dataset #9:  70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                 | 6653/9497 [03:34<01:23, 34.06ex/s]
+preprocess dataset #10:  69%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                   | 6543/9497 [03:33<01:48, 27.31ex/s]
+preprocess dataset #11:  69%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                    | 6521/9496 [03:32<01:42, 29.13ex/s]
+preprocess dataset #12:  68%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                    | 6502/9496 [03:31<01:29, 33.56ex/s]
+preprocess dataset #13:  68%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                      | 6429/9496 [03:31<01:42, 29.97ex/s]
+preprocess dataset #14:  68%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                      | 6411/9496 [03:30<01:39, 31.09ex/s]
+preprocess dataset #15:  64%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                             | 6121/9496 [03:29<01:37, 34.59ex/s]
+preprocess dataset #16:  66%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                         | 6265/9496 [03:28<01:51, 29.11ex/s]
+preprocess dataset #17:  63%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                               | 6028/9496 [03:27<02:10, 26.65ex/s]
+preprocess dataset #18:  64%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                              | 6058/9496 [03:26<02:02, 28.17ex/s]
+preprocess dataset #19:  65%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                            | 6165/9496 [03:25<02:04, 26.86ex/s]
+preprocess dataset #20:  64%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                             | 6107/9496 [03:24<02:08, 26.33ex/s]
+preprocess dataset #21:  63%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                | 5995/9496 [03:23<02:08, 27.34ex/s]
+preprocess dataset #22:  63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                 | 5941/9496 [03:22<02:14, 26.36ex/s]
+preprocess dataset #23:  61%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                   | 5829/9496 [03:20<01:44, 35.00ex/s]
+preprocess dataset #24:  63%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                | 5998/9496 [03:18<01:49, 32.07ex/s]
+preprocess dataset #25:  61%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                    | 5806/9496 [03:18<02:01, 30.35ex/s]
+preprocess dataset #26:  62%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                   | 5854/9496 [03:17<01:32, 39.21ex/s]
+preprocess dataset #27:  62%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                   | 5860/9496 [03:15<01:49, 33.30ex/s]
+preprocess dataset #28:  61%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                     | 5778/9496 [03:14<01:48, 34.27ex/s]
+preprocess dataset #29:  60%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                      | 5724/9496 [03:13<01:49, 34.54ex/s]
+preprocess dataset #30:  62%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                  | 5874/9496 [03:12<01:51, 32.35ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #27:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 9406/9496 [05:06<00:01, 50.03ex/s]
+preprocess dataset #28:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊    | 9311/9496 [05:05<00:03, 61.29ex/s]
+preprocess dataset #29:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉     | 9272/9496 [05:04<00:04, 53.12ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 9469/9496 [05:03<00:00, 56.88ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9417/9496 [05:07<00:01, 54.37ex/s]
+preprocess dataset #29:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 9365/9496 [05:05<00:02, 59.44ex/s]
+preprocess dataset #29:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9442/9496 [05:07<00:00, 65.09ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 9491/9496 [05:07<00:00, 75.18ex/s]
+preprocess dataset #31: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9453/9496 [05:04<00:00, 72.09ex/s]
+preprocess dataset #17:   2%|███▋                                                                                                                                                                                                                       | 21/1267 [00:01<00:51, 24.06ex/s]
+preprocess dataset #18:   1%|██▉                                                                                                                                                                                                                        | 17/1267 [00:01<01:02, 20.06ex/s]
+preprocess dataset #19:   1%|██▉                                                                                                                                                                                                                        | 17/1267 [00:01<01:00, 20.69ex/s]
+preprocess dataset #20:   1%|██▍                                                                                                                                                                                                                        | 14/1266 [00:01<01:13, 17.12ex/s]
+preprocess dataset #21:   2%|███▊                                                                                                                                                                                                                       | 22/1266 [00:01<00:44, 27.67ex/s]
+preprocess dataset #22:   1%|██▏                                                                                                                                                                                                                        | 13/1266 [00:01<01:16, 16.38ex/s]
+preprocess dataset #23:   1%|█▉                                                                                                                                                                                                                         | 11/1266 [00:01<01:28, 14.20ex/s]
+preprocess dataset #24:   1%|██▌                                                                                                                                                                                                                        | 15/1266 [00:01<01:05, 19.04ex/s]
+preprocess dataset #25:   1%|█▌                                                                                                                                                                                                                          | 9/1266 [00:01<01:50, 11.38ex/s]
+preprocess dataset #26:   1%|█▍                                                                                                                                                                                                                          | 8/1266 [00:01<02:08,  9.79ex/s]
+preprocess dataset #27:   1%|█▍                                                                                                                                                                                                                          | 8/1266 [00:01<01:58, 10.61ex/s]
+preprocess dataset #28:   1%|█▌                                                                                                                                                                                                                          | 9/1266 [00:01<01:46, 11.78ex/s]
+preprocess dataset #29:   0%|▊                                                                                                                                                                                                                           | 5/1266 [00:01<03:18,  6.34ex/s]
+preprocess dataset #30:   0%|▊                                                                                                                                                                                                                           | 5/1266 [00:01<03:24,  6.18ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                      | 1043/1266 [00:37<00:09, 24.38ex/s]
+preprocess dataset #29:  88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                          | 1113/1266 [00:38<00:04, 34.57ex/s]
+preprocess dataset #30:  87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                           | 1106/1266 [00:38<00:04, 34.00ex/s]
+preprocess dataset #17:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍            | 1194/1267 [00:41<00:01, 39.72ex/s]
+preprocess dataset #19:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                 | 1163/1267 [00:41<00:02, 37.42ex/s]
+preprocess dataset #20:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊            | 1195/1266 [00:41<00:01, 38.60ex/s]
+preprocess dataset #21:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋           | 1200/1266 [00:41<00:01, 39.53ex/s]
+preprocess dataset #22:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌               | 1176/1266 [00:41<00:02, 37.12ex/s]
+preprocess dataset #23:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                  | 1161/1266 [00:41<00:02, 41.22ex/s]
+preprocess dataset #24:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                  | 1161/1266 [00:41<00:02, 40.31ex/s]
+preprocess dataset #25:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                | 1173/1266 [00:41<00:02, 37.07ex/s]
+preprocess dataset #26:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                  | 1161/1266 [00:41<00:02, 36.81ex/s]
+preprocess dataset #27:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                  | 1157/1266 [00:41<00:02, 36.37ex/s]
+preprocess dataset #29:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊           | 1201/1266 [00:41<00:01, 37.09ex/s]
+preprocess dataset #30:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍            | 1193/1266 [00:41<00:01, 38.86ex/s]
+preprocess dataset #24:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏    | 1238/1266 [00:43<00:00, 35.84ex/s]
+preprocess dataset #25:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 1252/1266 [00:43<00:00, 41.11ex/s]
+preprocess dataset #26:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌    | 1240/1266 [00:43<00:00, 36.06ex/s]
+preprocess dataset #27:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████      | 1231/1266 [00:43<00:01, 30.57ex/s]
+preprocess dataset #31:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋     | 1235/1266 [00:42<00:00, 39.08ex/s]
+preprocess dataset #31:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋    | 1241/1266 [00:43<00:00, 44.47ex/s]
+preprocess dataset #26: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1265/1266 [00:43<00:00, 41.53ex/s]
+preprocess dataset #27:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 1252/1266 [00:43<00:00, 34.98ex/s]
+preprocess dataset #12: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1267/1267 [00:44<00:00, 26.65ex/s]
+preprocess dataset #23: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1265/1266 [00:44<00:00, 40.90ex/s]
+preprocess dataset #31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1266/1266 [00:43<00:00, 37.93ex/s]
+preprocess dataset #27:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 1257/1266 [00:43<00:00, 35.94ex/s]
+preprocess dataset #9:   1%|██▍                                                                                                                                                                                                                         | 28/2554 [00:01<01:43, 24.43ex/s]
+preprocess dataset #10:   1%|██▏                                                                                                                                                                                                                        | 26/2554 [00:01<01:43, 24.45ex/s]
+preprocess dataset #11:   1%|██▋                                                                                                                                                                                                                        | 31/2554 [00:01<01:18, 32.06ex/s]
+preprocess dataset #12:   1%|█▉                                                                                                                                                                                                                         | 23/2554 [00:01<01:48, 23.40ex/s]
+preprocess dataset #13:   1%|██                                                                                                                                                                                                                         | 24/2554 [00:01<01:37, 25.92ex/s]
+preprocess dataset #14:   1%|█▌                                                                                                                                                                                                                         | 18/2554 [00:01<02:05, 20.17ex/s]
+preprocess dataset #15:   1%|█▉                                                                                                                                                                                                                         | 23/2554 [00:01<01:38, 25.72ex/s]
+preprocess dataset #16:   1%|█▌                                                                                                                                                                                                                         | 18/2554 [00:01<02:04, 20.38ex/s]
+preprocess dataset #17:   1%|█▌                                                                                                                                                                                                                         | 18/2554 [00:01<02:06, 20.00ex/s]
+preprocess dataset #18:   1%|█▋                                                                                                                                                                                                                         | 20/2554 [00:01<01:41, 24.93ex/s]
+preprocess dataset #19:   1%|█▌                                                                                                                                                                                                                         | 18/2554 [00:01<02:06, 19.97ex/s]
+preprocess dataset #20:   1%|█▏                                                                                                                                                                                                                         | 14/2554 [00:01<02:32, 16.64ex/s]
+preprocess dataset #21:   0%|▊                                                                                                                                                                                                                          | 10/2554 [00:01<03:29, 12.13ex/s]
+preprocess dataset #22:   0%|█                                                                                                                                                                                                                          | 12/2554 [00:01<02:47, 15.17ex/s]
+preprocess dataset #23:   1%|█▏                                                                                                                                                                                                                         | 14/2554 [00:01<02:20, 18.14ex/s]
+preprocess dataset #24:   0%|▌                                                                                                                                                                                                                           | 6/2554 [00:01<05:46,  7.36ex/s]
+preprocess dataset #25:   0%|▌                                                                                                                                                                                                                           | 7/2554 [00:01<04:55,  8.63ex/s]
+preprocess dataset #26:   0%|▊                                                                                                                                                                                                                          | 10/2554 [00:01<03:10, 13.32ex/s]
+preprocess dataset #27:   0%|▎                                                                                                                                                                                                                           | 4/2554 [00:01<09:34,  4.44ex/s]
+preprocess dataset #28:   0%|▌                                                                                                                                                                                                                           | 7/2554 [00:01<04:42,  9.02ex/s]
+preprocess dataset #29:   0%|▎                                                                                                                                                                                                                           | 4/2554 [00:01<08:54,  4.77ex/s]
+preprocess dataset #30:   0%|▍                                                                                                                                                                                                                           | 5/2554 [00:01<06:46,  6.26ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #26:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████       | 2472/2554 [01:17<00:02, 30.31ex/s]
+preprocess dataset #27:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊      | 2481/2554 [01:17<00:02, 33.68ex/s]
+preprocess dataset #28:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌       | 2466/2554 [01:17<00:02, 33.22ex/s]
+preprocess dataset #29:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌      | 2478/2554 [01:17<00:03, 24.91ex/s]
+preprocess dataset #30:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉     | 2495/2554 [01:17<00:01, 34.03ex/s]
+preprocess dataset #22:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 2528/2554 [01:19<00:01, 22.85ex/s]
+preprocess dataset #26:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 2531/2554 [01:19<00:00, 38.83ex/s]
+preprocess dataset #24:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 2508/2554 [01:19<00:02, 22.65ex/s]
+preprocess dataset #26:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 2535/2554 [01:19<00:00, 36.55ex/s]
+preprocess dataset #25:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎   | 2511/2554 [01:19<00:01, 26.65ex/s]
+preprocess dataset #28:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 2528/2554 [01:19<00:00, 37.32ex/s]
+preprocess dataset #26:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 2539/2554 [01:19<00:00, 35.57ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 2532/2554 [01:19<00:00, 36.40ex/s]
+preprocess dataset #27: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2551/2554 [01:19<00:00, 35.34ex/s]
+preprocess dataset #24:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 2522/2554 [01:20<00:01, 27.98ex/s]
+preprocess dataset #25:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 2523/2554 [01:19<00:01, 30.61ex/s]
+preprocess dataset #20: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 2553/2554 [01:20<00:00, 20.98ex/s]
+preprocess dataset #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 2546/2554 [01:20<00:00, 21.72ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 2544/2554 [01:19<00:00, 33.72ex/s]
+preprocess dataset #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 2552/2554 [01:20<00:00, 20.37ex/s]
+preprocess dataset #25:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 2535/2554 [01:20<00:00, 31.10ex/s]
+preprocess dataset #24:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 2537/2554 [01:20<00:00, 26.16ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2547/2554 [01:20<00:00, 27.61ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 2552/2554 [01:21<00:00, 30.58ex/s]
+#13: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 538.27ba/s]
+#15: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 529.20ba/s]
+#16: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 532.88ba/s]
+#17: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 534.48ba/s]
+#18: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 534.01ba/s]
+#19: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 553.35ba/s]
+#20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 531.95ba/s]
+#22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 542.43ba/s]
+#24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 557.20ba/s]
+#21: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 351.82ba/s]
+#25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 555.68ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 526.74ba/s]
+#23: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 368.80ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 542.82ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 542.38ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 541.24ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 531.80ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 534.11ba/s]
+#18:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#12:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#13:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#14:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 280.82ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 283.53ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 280.45ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 240.93ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 273.22ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 286.71ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 297.37ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 303.85ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 311.46ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 127.39ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 102.83ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 132.98ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 128.52ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 136.85ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 137.94ba/s]
+https://symbolize.stripped_domain/r/?trace=7fbf3a9453f4,7fbf3a9990bf,7f,3d2f2e983bd0e18f&map=
+*** SIGTERM received by PID 2339658 (TID 2339658) on cpu 92 from PID 3333166; stack trace: ***                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+PC: @     0x7fbf3a9453f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7fbceb176294        976  (unknown)
+    @     0x7fbf3a9990c0  578759824  (unknown)
+    @               0x80  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7fbf3a945a65,7fbf3a9990bf,906aff&map=
+*** SIGTERM received by PID 2339685 (TID 2339685) on cpu 17 from PID 3333166; stack trace: ***                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+PC: @     0x7fbf3a945a65  (unknown)  sem_post@@GLIBC_2.2.5                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+    @     0x7fbceb176294        976  (unknown)
+    @     0x7fbf3a9990c0  (unknown)  (unknown)
+    @           0x906b00  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7fbf3a945a65,7fbceb176293,7fbf3a9990bf,906aff&map=fbcd4e3f2be272741f2aecd9d840a066:7fbcd5bd9000-7fbceb508c60                                                                                                            | 0/10 [00:00<?, ?ba/s]
+E0730 09:51:38.360434 2339685 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+    @ 0x3d2f2e983bd0e190  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7fbf3a9453f4,7fbceb176293,7fbf3a9990bf,7f,3d2f2e983bd0e18f&map=fbcd4e3f2be272741f2aecd9d840a066:7fbcd5bd9000-7fbceb508c60                                                                                               | 0/10 [00:00<?, ?ba/s]
+E0730 09:51:38.365002 2339658 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0730 09:51:38.393323 2339685 process_state.cc:774] RAW: Raising signal 15 with default behavior
+E0730 09:51:38.439940 2339658 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#3: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 82.97ba/s]
+#6: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 90.81ba/s]
+#7: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 92.60ba/s]
+#8: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 95.66ba/s]
+#9: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 98.14ba/s]
+#10: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 99.32ba/s]
+#12: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 102.97ba/s]
+#11: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 72.40ba/s]
+#13: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 74.92ba/s]
+#14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 73.51ba/s]
+#15: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 105.60ba/s]
+#17: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 99.00ba/s]
+#16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 105.90ba/s]
+#18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 107.50ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 108.67ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 106.41ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 103.80ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 101.14ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 104.61ba/s]
+#23: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 70.05ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 103.77ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 102.96ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 101.07ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 106.61ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 106.74ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 107.46ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.76ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 87.59ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 111.87ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 109.41ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 114.30ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 116.61ba/s]
+https://symbolize.stripped_domain/r/?trace=7fbf3a9453f4,7fbf3a9990bf,7f,3d2f2e983bd0e18f&map=                                                                                                                                                                       | 0/3 [00:00<?, ?ba/s]
+*** SIGTERM received by PID 2340054 (TID 2340054) on cpu 25 from PID 3333166; stack trace: ***                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+PC: @     0x7fbf3a9453f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7fbceb176294        976  (unknown)
+    @     0x7fbf3a9990c0  578759824  (unknown)
+    @               0x80  (unknown)  (unknown)                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+    @ 0x3d2f2e983bd0e190  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7fbf3a9453f4,7fbceb176293,7fbf3a9990bf,7f,3d2f2e983bd0e18f&map=fbcd4e3f2be272741f2aecd9d840a066:7fbcd5bd9000-7fbceb508c60                                                                                                | 0/3 [00:00<?, ?ba/s]
+E0730 09:51:53.632873 2340054 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                               | 0/3 [00:00<?, ?ba/s]
+E0730 09:51:53.714910 2340054 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+  "add_adapter": false,                                                                                                                                                                                                                                             | 0/3 [00:00<?, ?ba/s]
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.094,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.04,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": true,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.047,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.041,
+  "mask_feature_length": 64,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.25,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.082,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 38,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 39,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file ./preprocessor_config.json
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+2022-07-30 09:51:55.266267: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
+2022-07-30 09:51:55.266356: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
+/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+INFO:__main__:***** Running training *****
+INFO:__main__:  Num examples = 302693
+INFO:__main__:  Num Epochs = 40
+INFO:__main__:  Instantaneous batch size per device = 12
+INFO:__main__:  Num gradient accumulation steps = 1
+INFO:__main__:  Total train batch size (w. parallel & distributed) = 96
+INFO:__main__:  Total optimization steps = 126120
+INFO:__main__:  Gradient checkpointing: True
+INFO:__main__:  Use scan: False
+INFO:__main__:  Fuse matmuls: False
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [00:00<?, ?it/s]
+
+  grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)                                                                                                                                                                                     | 0/3153 [00:00<?, ?it/s]
+run_flax_speech_recognition_ctc.py:1392: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+run_flax_speech_recognition_ctc.py:1399: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+Training...:   0%|                                                                                                                                                                                                                                               | 0/3153 [04:00<?, ?it/s]
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [04:07<?, ?it/s]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback
+    return fun(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss
+    out_tree, out_flat = f_pmapped_(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f
+    out = pxla.xla_pmap(
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind
+    return map_bind(self, fun, *args, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind
+    outs = primitive.process(top_trace, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process
+    return trace.process_map(self, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call
+    return primitive.impl(f, *tracers, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 829, in xla_pmap_impl
+    compiled_fun, fingerprint = parallel_callable(
+  File "/data/flax/lib/python3.8/site-packages/jax/linear_util.py", line 295, in memoized_fun
+    ans = call(fun, *args)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 860, in parallel_callable
+    pmap_executable = pmap_computation.compile()
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper
+    return func(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1137, in compile
+    self._executable = PmapExecutable.from_hlo(self._hlo, **self.compile_args)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1285, in from_hlo
+    compiled = dispatch.compile_or_get_cached(
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 899, in compile_or_get_cached
+    return backend_compile(backend, computation, compile_options, host_callbacks)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper
+    return func(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 843, in backend_compile
+    return backend.compile(built_c, compile_options=options)
+jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 16.18G of 15.48G hbm. Exceeded hbm capacity by 717.52M.
+Total hbm usage >= 16.70G:
+    reserved        530.00M
+    program           5.38G
+    arguments        10.80G
+Output size 10.76G; shares 10.76G with arguments.
+Program hbm requirement 5.38G:
+    global           180.0K
+    scoped           72.08M
+    HLO temp          5.31G (99.0% utilization: Unpadded (5.02G) Padded (5.07G), 4.6% fragmentation (247.75M))
+  Largest program allocations in hbm:
+  1. Size: 750.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95
+     Shape: f32[12,16,999,999]{2,3,1,0:T(8,128)}
+     Unpadded size: 730.96M
+     Extra memory due to padding: 19.04M (1.0x expansion)
+     XLA label: fusion.194.remat6 = fusion(bitcast.8986, bitcast.8984, fusion.16271), kind=kOutput, calls=fused_computation.184.clone.clone.clone.clone.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  2. Size: 234.38M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/feed_forward/intermediate_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,5120]{2,1,0:T(8,128)}
+     Unpadded size: 234.14M
+     Extra memory due to padding: 240.0K (1.0x expansion)
+     XLA label: fusion.1385.remat = fusion(get-tuple-element.34607, fusion.6987.remat2.1, get-tuple-element.34610, get-tuple-element.34611, ...(+3)), kind=kOutput, calls=fused_computation.1219.clone
+     Allocation type: HLO temp
+     ==========================
+  3. Size: 117.19M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (1,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[12,999,5120]{2,1,0:T(8,128)(2,1)}
+     Unpadded size: 117.07M
+     Extra memory due to padding: 120.0K (1.0x expansion)
+     XLA label: fusion.27201 = fusion(fusion.1385.remat, get-tuple-element.34582, get-tuple-element.34581, get-tuple-element.34616, ...(+1)), kind=kOutput, calls=fused_computation.20943
+     Allocation type: HLO temp
+     ==========================
+  4. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/6/remat(core_fn)/6/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.7665 = fusion(get-tuple-element.9818, get-tuple-element.34155, get-tuple-element.11245, get-tuple-element.34154, ...(+3)), kind=kLoop, calls=fused_computation.7055
+     Allocation type: HLO temp
+     ==========================
+  5. Size: 58.59M
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: copy.5651 = copy(bitcast.15721)
+     Allocation type: HLO temp
+     ==========================
+  6. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.22617 = fusion(fusion.7619, copy.5651, get-tuple-element.13649, fusion.10635, ...(+1)), kind=kLoop, calls=fused_computation.20747
+     Allocation type: HLO temp
+     ==========================
+  7. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.22615 = fusion(get-tuple-element.10888, get-tuple-element.12891, get-tuple-element.12889, get-tuple-element.12893, ...(+5)), kind=kOutput, calls=fused_computation.20745
+     Allocation type: HLO temp
+     ==========================
+  8. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.22613 = fusion(get-tuple-element.10892, get-tuple-element.12907, get-tuple-element.12905, get-tuple-element.12909, ...(+5)), kind=kOutput, calls=fused_computation.20743
+     Allocation type: HLO temp
+     ==========================
+  9. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.22611 = fusion(get-tuple-element.10896, get-tuple-element.13083, get-tuple-element.13081, get-tuple-element.13085, ...(+5)), kind=kOutput, calls=fused_computation.20741
+     Allocation type: HLO temp
+     ==========================
+  10. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.22609 = fusion(get-tuple-element.10900, get-tuple-element.13259, get-tuple-element.13257, get-tuple-element.13261, ...(+5)), kind=kOutput, calls=fused_computation.20739
+     Allocation type: HLO temp
+     ==========================
+  11. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.22607 = fusion(get-tuple-element.10904, get-tuple-element.13435, get-tuple-element.13433, get-tuple-element.13437, ...(+5)), kind=kOutput, calls=fused_computation.20737
+     Allocation type: HLO temp
+     ==========================
+  12. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.6987.remat3 = fusion(get-tuple-element.34614, bitcast.1517, get-tuple-element.34601, bitcast.12393), kind=kOutput, calls=fused_computation.6377.clone.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  13. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.6987.remat2 = fusion(get-tuple-element.34614, bitcast.1517, get-tuple-element.34601, bitcast.12390), kind=kOutput, calls=fused_computation.6377.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  14. Size: 40.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/pos_conv_embed/conv/conv/rev[dimensions=(0,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=425
+     Shape: bf16[128,1280,80]{2,1,0:T(8,128)(2,1)}
+     Unpadded size: 25.00M
+     Extra memory due to padding: 15.00M (1.6x expansion)
+     XLA label: reverse.37400 = reverse(bitcast.2126), dimensions={0}
+     Allocation type: HLO temp
+     ==========================
+  15. Size: 33.75M
+     Shape: bf16[12,1127,16,80]{1,3,2,0:T(8,128)(2,1)}
+     Unpadded size: 33.02M
+     Extra memory due to padding: 750.0K (1.0x expansion)
+     XLA label: copy.3902.remat_compressed = copy(copy.3902)
+     Allocation type: HLO temp
+     ==========================
+  16. Size: 31.22M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/feature_projection/layer_norm/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=150
+     Shape: f32[12,999,512]{2,0,1:T(8,128)}
+     Unpadded size: 23.41M
+     Extra memory due to padding: 7.80M (1.3x expansion)
+     XLA label: fusion.9234.remat = fusion(get-tuple-element.13680, get-tuple-element.13681, copy.3899, copy.3897, ...(+2)), kind=kLoop, calls=fused_computation.8286.clone
+     Allocation type: HLO temp
+     ==========================
+  17. Size: 30.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/v_proj/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=200
+     Shape: bf16[12,999,1280]{1,2,0:T(8,128)(2,1)}
+     Unpadded size: 29.27M
+     Extra memory due to padding: 750.0K (1.0x expansion)
+     XLA label: copy.4655.remat2 = copy(fusion.6991.remat3)
+     Allocation type: HLO temp
+     ==========================
+  18. Size: 30.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/div" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=93
+     Shape: bf16[12,999,1280]{1,2,0:T(8,128)(2,1)}
+     Unpadded size: 29.27M
+     Extra memory due to padding: 750.0K (1.0x expansion)
+     XLA label: copy.4656.remat2 = copy(fusion.6990.remat2)
+     Allocation type: HLO temp
+     ==========================
+  19. Size: 30.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/k_proj/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=200
+     Shape: bf16[12,999,1280]{1,2,0:T(8,128)(2,1)}
+     Unpadded size: 29.27M
+     Extra memory due to padding: 750.0K (1.0x expansion)
+     XLA label: copy.4657.remat2 = copy(fusion.6988.remat3)
+     Allocation type: HLO temp
+     ==========================
+  20. Size: 30.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/jit(_einsum)/dot_general[dimension_numbers=(((1,), (3,)), ((0, 2), (0, 1))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/wav2vec2-1b-npsc-nst/models/modeling_flax_wav2vec2.py" source_line=387
+     Shape: bf16[12,16,80,999]{3,2,1,0:T(8,128)(2,1)}
+     Unpadded size: 29.27M
+     Extra memory due to padding: 750.0K (1.0x expansion)
+     XLA label: fusion.787 = fusion(bitcast.8988, fusion.754, get-tuple-element.11246, bitcast.8986, ...(+2)), kind=kOutput, calls=fused_computation.718
+     Allocation type: HLO temp
+     ==========================
+The stack trace below excludes JAX-internal frames.
+The preceding is the original exception that occurred, unmodified.
+--------------------
+The above exception was the direct cause of the following exception:
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 16.18G of 15.48G hbm. Exceeded hbm capacity by 717.52M.
+Total hbm usage >= 16.70G:
+    reserved        530.00M
+    program           5.38G
+    arguments        10.80G
+Output size 10.76G; shares 10.76G with arguments.
+Program hbm requirement 5.38G:
+    global           180.0K
+    scoped           72.08M
+    HLO temp          5.31G (99.0% utilization: Unpadded (5.02G) Padded (5.07G), 4.6% fragmentation (247.75M))
+  Largest program allocations in hbm:
+  1. Size: 750.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95
+     Shape: f32[12,16,999,999]{2,3,1,0:T(8,128)}
+     Unpadded size: 730.96M
+     Extra memory due to padding: 19.04M (1.0x expansion)
+     XLA label: fusion.194.remat6 = fusion(bitcast.8986, bitcast.8984, fusion.16271), kind=kOutput, calls=fused_computation.184.clone.clone.clone.clone.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  2. Size: 234.38M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/feed_forward/intermediate_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,5120]{2,1,0:T(8,128)}
+     Unpadded size: 234.14M
+     Extra memory due to padding: 240.0K (1.0x expansion)
+     XLA label: fusion.1385.remat = fusion(get-tuple-element.34607, fusion.6987.remat2.1, get-tuple-element.34610, get-tuple-element.34611, ...(+3)), kind=kOutput, calls=fused_computation.1219.clone
+     Allocation type: HLO temp
+     ==========================
+  3. Size: 117.19M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (1,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[12,999,5120]{2,1,0:T(8,128)(2,1)}
+     Unpadded size: 117.07M
+     Extra memory due to padding: 120.0K (1.0x expansion)
+     XLA label: fusion.27201 = fusion(fusion.1385.remat, get-tuple-element.34582, get-tuple-element.34581, get-tuple-element.34616, ...(+1)), kind=kOutput, calls=fused_computation.20943
+     Allocation type: HLO temp
+     ==========================
+  4. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/6/remat(core_fn)/6/layer_norm/add_any" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.7665 = fusion(get-tuple-element.9818, get-tuple-element.34155, get-tuple-element.11245, get-tuple-element.34154, ...(+3)), kind=kLoop, calls=fused_computation.7055
+     Allocation type: HLO temp
+     ==========================
+  5. Size: 58.59M
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: copy.5651 = copy(bitcast.15721)
+     Allocation type: HLO temp
+     ==========================
+  6. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.22617 = fusion(fusion.7619, copy.5651, get-tuple-element.13649, fusion.10635, ...(+1)), kind=kLoop, calls=fused_computation.20747
+     Allocation type: HLO temp
+     ==========================
+  7. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.22615 = fusion(get-tuple-element.10888, get-tuple-element.12891, get-tuple-element.12889, get-tuple-element.12893, ...(+5)), kind=kOutput, calls=fused_computation.20745
+     Allocation type: HLO temp
+     ==========================
+  8. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/1/1/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.22613 = fusion(get-tuple-element.10892, get-tuple-element.12907, get-tuple-element.12905, get-tuple-element.12909, ...(+5)), kind=kOutput, calls=fused_computation.20743
+     Allocation type: HLO temp
+     ==========================
+  9. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/2/2/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.22611 = fusion(get-tuple-element.10896, get-tuple-element.13083, get-tuple-element.13081, get-tuple-element.13085, ...(+5)), kind=kOutput, calls=fused_computation.20741
+     Allocation type: HLO temp
+     ==========================
+  10. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/3/3/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.22609 = fusion(get-tuple-element.10900, get-tuple-element.13259, get-tuple-element.13257, get-tuple-element.13261, ...(+5)), kind=kOutput, calls=fused_computation.20739
+     Allocation type: HLO temp
+     ==========================
+  11. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/4/4/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.22607 = fusion(get-tuple-element.10904, get-tuple-element.13435, get-tuple-element.13433, get-tuple-element.13437, ...(+5)), kind=kOutput, calls=fused_computation.20737
+     Allocation type: HLO temp
+     ==========================
+  12. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.6987.remat3 = fusion(get-tuple-element.34614, bitcast.1517, get-tuple-element.34601, bitcast.12393), kind=kOutput, calls=fused_computation.6377.clone.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  13. Size: 58.59M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/out_proj/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: f32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.6987.remat2 = fusion(get-tuple-element.34614, bitcast.1517, get-tuple-element.34601, bitcast.12390), kind=kOutput, calls=fused_computation.6377.clone.clone
+     Allocation type: HLO temp
+     ==========================
+  14. Size: 40.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/pos_conv_embed/conv/conv/rev[dimensions=(0,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=425
+     Shape: bf16[128,1280,80]{2,1,0:T(8,128)(2,1)}
+     Unpadded size: 25.00M
+     Extra memory due to padding: 15.00M (1.6x expansion)
+     XLA label: reverse.37400 = reverse(bitcast.2126), dimensions={0}
+     Allocation type: HLO temp
+     ==========================
+  15. Size: 33.75M
+     Shape: bf16[12,1127,16,80]{1,3,2,0:T(8,128)(2,1)}
+     Unpadded size: 33.02M
+     Extra memory due to padding: 750.0K (1.0x expansion)
+     XLA label: copy.3902.remat_compressed = copy(copy.3902)
+     Allocation type: HLO temp
+     ==========================
+  16. Size: 31.22M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/feature_projection/layer_norm/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=150
+     Shape: f32[12,999,512]{2,0,1:T(8,128)}
+     Unpadded size: 23.41M
+     Extra memory due to padding: 7.80M (1.3x expansion)
+     XLA label: fusion.9234.remat = fusion(get-tuple-element.13680, get-tuple-element.13681, copy.3899, copy.3897, ...(+2)), kind=kLoop, calls=fused_computation.8286.clone
+     Allocation type: HLO temp
+     ==========================
+  17. Size: 30.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/v_proj/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=200
+     Shape: bf16[12,999,1280]{1,2,0:T(8,128)(2,1)}
+     Unpadded size: 29.27M
+     Extra memory due to padding: 750.0K (1.0x expansion)
+     XLA label: copy.4655.remat2 = copy(fusion.6991.remat3)
+     Allocation type: HLO temp
+     ==========================
+  18. Size: 30.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/div" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=93
+     Shape: bf16[12,999,1280]{1,2,0:T(8,128)(2,1)}
+     Unpadded size: 29.27M
+     Extra memory due to padding: 750.0K (1.0x expansion)
+     XLA label: copy.4656.remat2 = copy(fusion.6990.remat2)
+     Allocation type: HLO temp
+     ==========================
+  19. Size: 30.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/k_proj/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=200
+     Shape: bf16[12,999,1280]{1,2,0:T(8,128)(2,1)}
+     Unpadded size: 29.27M
+     Extra memory due to padding: 750.0K (1.0x expansion)
+     XLA label: copy.4657.remat2 = copy(fusion.6988.remat3)
+     Allocation type: HLO temp
+     ==========================
+  20. Size: 30.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/5/remat(core_fn)/5/attention/jit(_einsum)/dot_general[dimension_numbers=(((1,), (3,)), ((0, 2), (0, 1))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/wav2vec2-1b-npsc-nst/models/modeling_flax_wav2vec2.py" source_line=387
+     Shape: bf16[12,16,80,999]{3,2,1,0:T(8,128)(2,1)}
+     Unpadded size: 29.27M
+     Extra memory due to padding: 750.0K (1.0x expansion)
+     XLA label: fusion.787 = fusion(bitcast.8988, fusion.754, get-tuple-element.11246, bitcast.8986, ...(+2)), kind=kOutput, calls=fused_computation.718
+     Allocation type: HLO temp
+     ==========================
\ No newline at end of file
diff --git a/wandb/run-20220730_093953-16dexcvn/files/requirements.txt b/wandb/run-20220730_093953-16dexcvn/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab
--- /dev/null
+++ b/wandb/run-20220730_093953-16dexcvn/files/requirements.txt
@@ -0,0 +1,158 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+exceptiongroup==1.0.0rc8
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+hypothesis==6.53.0
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+jiwer==2.3.0
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pyctcdecode==0.4.0
+pygments==2.11.1
+pygtrie==2.5.0
+pyparsing==3.0.6
+python-dateutil==2.8.2
+python-levenshtein==0.12.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+sortedcontainers==2.4.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220730_093953-16dexcvn/files/wandb-metadata.json b/wandb/run-20220730_093953-16dexcvn/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..42f9a938eb6babc1a922aa94d71039686881e3ae
--- /dev/null
+++ b/wandb/run-20220730_093953-16dexcvn/files/wandb-metadata.json
@@ -0,0 +1,66 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-30T09:39:57.124976",
+    "startedAt": "2022-07-30T09:39:53.726539",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=./",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=12",
+        "--per_device_eval_batch_size=12",
+        "--gradient_accumulation_steps=1",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220730_093953-16dexcvn/files/wandb-summary.json b/wandb/run-20220730_093953-16dexcvn/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..f6b533a778379f18be357d7655677b0f2090d573
--- /dev/null
+++ b/wandb/run-20220730_093953-16dexcvn/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 979}}
\ No newline at end of file
diff --git a/wandb/run-20220730_093953-16dexcvn/logs/debug-internal.log b/wandb/run-20220730_093953-16dexcvn/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..2e5ce16b2c28d4249490f5b2ea7ff82b5d805307
--- /dev/null
+++ b/wandb/run-20220730_093953-16dexcvn/logs/debug-internal.log
@@ -0,0 +1,559 @@
+2022-07-30 09:39:54,635 INFO    MainThread:3334444 [internal.py:wandb_internal():87] W&B internal server running at pid: 3334444, started at: 2022-07-30 09:39:54.635261
+2022-07-30 09:39:54,637 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: check_version
+2022-07-30 09:39:54,638 INFO    WriterThread:3334444 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/run-16dexcvn.wandb
+2022-07-30 09:39:54,638 DEBUG   SenderThread:3334444 [sender.py:send():234] send: header
+2022-07-30 09:39:54,638 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: check_version
+2022-07-30 09:39:54,676 DEBUG   SenderThread:3334444 [sender.py:send():234] send: run
+2022-07-30 09:39:54,876 INFO    SenderThread:3334444 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files
+2022-07-30 09:39:54,876 INFO    SenderThread:3334444 [sender.py:_start_run_threads():804] run started: 16dexcvn with start time 1659173993
+2022-07-30 09:39:54,876 DEBUG   SenderThread:3334444 [sender.py:send():234] send: summary
+2022-07-30 09:39:54,876 INFO    SenderThread:3334444 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 09:39:54,876 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: run_start
+2022-07-30 09:39:55,880 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/wandb-summary.json
+2022-07-30 09:39:57,124 DEBUG   HandlerThread:3334444 [meta.py:__init__():40] meta init
+2022-07-30 09:39:57,124 DEBUG   HandlerThread:3334444 [meta.py:__init__():54] meta init done
+2022-07-30 09:39:57,124 DEBUG   HandlerThread:3334444 [meta.py:probe():214] probe
+2022-07-30 09:39:57,126 DEBUG   HandlerThread:3334444 [meta.py:_setup_git():204] setup git
+2022-07-30 09:39:57,165 DEBUG   HandlerThread:3334444 [meta.py:_setup_git():211] setup git done
+2022-07-30 09:39:57,165 DEBUG   HandlerThread:3334444 [meta.py:_save_code():92] save code
+2022-07-30 09:39:57,178 DEBUG   HandlerThread:3334444 [meta.py:_save_code():113] save code done
+2022-07-30 09:39:57,178 DEBUG   HandlerThread:3334444 [meta.py:_save_patches():130] save patches
+2022-07-30 09:39:57,253 DEBUG   HandlerThread:3334444 [meta.py:_save_patches():172] save patches done
+2022-07-30 09:39:57,253 DEBUG   HandlerThread:3334444 [meta.py:_save_pip():58] save pip
+2022-07-30 09:39:57,253 DEBUG   HandlerThread:3334444 [meta.py:_save_pip():72] save pip done
+2022-07-30 09:39:57,254 DEBUG   HandlerThread:3334444 [meta.py:probe():252] probe done
+2022-07-30 09:39:57,257 DEBUG   SenderThread:3334444 [sender.py:send():234] send: files
+2022-07-30 09:39:57,257 INFO    SenderThread:3334444 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-30 09:39:57,257 INFO    SenderThread:3334444 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-30 09:39:57,258 INFO    SenderThread:3334444 [sender.py:_save_file():939] saving file diff.patch with policy now
+2022-07-30 09:39:57,263 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:39:57,263 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:39:57,708 INFO    Thread-11 :3334444 [upload_job.py:push():137] Uploaded file /tmp/tmppikonh_hwandb/1qaw72tn-wandb-metadata.json
+2022-07-30 09:39:57,785 INFO    Thread-13 :3334444 [upload_job.py:push():137] Uploaded file /tmp/tmppikonh_hwandb/2p365bv0-diff.patch
+2022-07-30 09:39:57,882 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/code/run_flax_speech_recognition_ctc.py
+2022-07-30 09:39:57,882 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:39:57,882 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/requirements.txt
+2022-07-30 09:39:57,882 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/diff.patch
+2022-07-30 09:39:57,882 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/wandb-metadata.json
+2022-07-30 09:39:57,882 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/code
+2022-07-30 09:39:57,944 INFO    Thread-12 :3334444 [upload_job.py:push():137] Uploaded file /tmp/tmppikonh_hwandb/29x3jl29-code/run_flax_speech_recognition_ctc.py
+2022-07-30 09:39:59,883 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:40:01,884 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:40:03,885 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:40:05,886 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:40:11,889 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:40:12,396 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:40:12,396 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:40:13,890 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:40:25,207 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:40:25,895 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:40:27,532 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:40:27,533 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:40:27,896 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:40:29,898 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:40:38,902 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:40:40,902 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:40:42,683 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:40:42,684 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:40:52,907 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:40:54,908 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:40:55,285 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:40:57,849 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:40:57,850 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:41:13,075 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:41:13,075 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:41:25,360 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:41:28,323 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:41:28,324 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:41:34,926 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:41:36,927 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:41:38,928 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:41:41,929 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:41:43,703 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:41:43,703 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:41:43,930 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:41:45,931 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:41:47,932 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:41:49,933 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:41:51,934 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:41:53,935 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:41:55,434 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:41:55,936 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:41:57,937 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:41:58,867 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:41:58,868 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:41:59,939 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:01,939 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:03,940 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:05,941 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:07,942 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:09,943 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:11,944 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:13,945 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:14,016 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:42:14,017 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:42:15,946 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:17,948 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:19,949 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:21,950 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:23,951 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:25,506 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:42:25,952 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:27,953 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:29,150 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:42:29,151 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:42:29,954 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:31,955 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:33,956 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:35,957 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:37,958 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:39,959 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:41,960 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:43,961 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:44,296 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:42:44,296 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:42:45,962 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:47,963 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:49,964 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:51,964 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:53,965 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:55,587 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:42:55,967 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:57,968 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:42:59,479 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:42:59,479 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:42:59,969 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:01,970 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:03,970 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:05,971 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:07,972 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:09,973 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:11,974 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:13,976 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:14,639 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:43:14,640 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:43:15,977 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:17,977 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:19,978 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:21,980 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:23,980 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:25,670 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:43:25,983 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:27,982 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:29,983 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:30,043 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:43:30,043 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:43:31,985 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:33,986 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:35,987 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:37,988 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:39,989 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:41,990 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:43,991 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:45,194 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:43:45,195 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:43:45,992 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:47,994 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:49,995 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:51,996 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:53,997 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:55,759 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:43:55,999 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:43:58,000 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:00,001 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:00,353 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:44:00,353 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:44:02,003 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:04,004 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:06,005 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:08,006 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:10,007 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:12,008 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:14,009 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:15,500 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:44:15,500 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:44:16,010 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:18,011 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:21,013 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:23,014 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:25,015 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:25,837 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:44:27,016 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:29,017 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:30,637 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:44:30,638 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:44:31,018 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:33,019 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:35,020 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:37,021 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:39,022 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:41,023 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:43,026 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:45,025 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:45,789 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:44:45,789 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:44:47,033 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:49,034 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:51,035 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:53,036 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:55,038 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:55,924 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:44:57,040 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:44:59,040 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:00,954 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:45:00,954 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:45:01,042 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:03,046 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:05,047 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:07,048 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:09,049 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:11,050 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:13,051 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:15,052 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:16,107 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:45:16,108 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:45:17,054 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:19,055 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:21,056 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:23,057 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:25,058 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:26,013 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:45:27,060 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:29,061 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:31,062 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:31,261 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:45:31,261 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:45:33,063 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:35,064 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:37,065 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:39,067 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:41,068 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:43,069 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:45,070 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:46,405 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:45:46,405 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:45:47,071 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:49,072 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:51,073 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:53,074 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:55,075 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:56,107 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:45:57,076 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:45:59,077 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:01,078 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:01,561 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:46:01,562 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:46:03,079 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:05,080 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:07,081 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:09,082 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:11,083 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:13,084 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:15,085 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:16,711 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:46:16,712 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:46:17,086 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:19,087 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:21,088 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:23,088 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:25,089 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:26,199 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:46:27,090 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:29,091 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:31,092 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:31,856 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:46:31,857 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:46:33,093 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:35,094 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:37,095 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:46:47,022 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:46:47,022 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:46:56,275 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:47:02,166 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:47:02,166 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:47:05,106 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:47:07,107 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:47:09,108 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:47:11,109 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:47:17,304 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:47:17,305 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:47:26,356 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:47:32,438 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:47:32,438 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:47:47,590 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:47:47,590 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:47:50,128 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:47:52,129 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:47:54,129 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:47:56,130 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:47:56,431 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:47:58,131 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:00,132 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:02,133 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:02,743 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:48:02,743 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:48:04,134 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:06,135 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:08,136 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:10,137 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:12,138 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:14,139 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:16,140 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:17,880 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:48:17,880 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:48:18,141 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:20,142 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:26,144 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:26,508 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:48:28,145 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:30,147 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:32,148 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:33,017 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:48:33,017 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:48:34,149 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:48:48,152 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:48:48,152 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:48:56,583 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:49:03,290 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:49:03,291 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:49:14,166 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:16,167 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:18,167 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:18,437 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:49:18,437 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:49:20,168 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:22,169 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:24,170 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:26,171 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:26,659 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:49:28,172 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:30,173 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:32,174 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:33,581 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:49:33,582 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:49:34,175 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:36,175 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:38,176 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:40,177 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:42,179 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:46,180 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:48,181 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:48,718 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:49:48,719 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:49:50,182 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:52,183 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:54,184 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:56,185 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:49:56,735 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:49:58,186 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:00,187 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:02,188 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:03,860 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:50:03,860 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:50:04,189 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:06,190 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:08,191 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:10,192 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:12,193 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:14,194 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:16,195 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:18,196 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:19,030 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:50:19,030 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:50:20,197 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:22,198 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:24,200 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:26,200 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:26,820 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:50:28,201 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:30,202 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:32,203 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:34,181 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:50:34,182 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:50:34,204 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:50:49,313 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:50:49,314 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:50:56,900 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:51:04,449 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:51:04,450 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:51:17,224 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:51:19,623 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:51:19,623 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:51:23,226 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:51:25,227 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:51:26,967 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:51:32,230 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:51:34,923 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:51:34,923 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:51:40,233 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:51:47,237 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:51:50,342 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:51:50,342 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:51:55,240 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:51:57,042 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:51:57,241 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:52:01,243 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:52:05,245 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:52:05,856 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:52:05,856 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:52:07,246 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:52:13,249 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:52:21,214 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:52:21,214 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:52:27,116 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:52:36,379 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:52:36,380 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:52:42,261 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:52:51,264 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:52:51,562 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:52:51,562 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:52:53,265 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:52:57,191 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:53:06,734 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:53:06,734 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:53:21,882 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:53:21,882 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:53:27,266 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:53:37,016 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:53:37,017 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:53:52,149 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:53:52,149 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:53:57,341 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:54:07,285 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:54:07,286 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:54:22,442 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:54:22,442 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:54:27,412 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:54:37,580 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:54:37,581 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:54:52,720 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:54:52,720 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:54:57,486 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:55:07,859 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:55:07,860 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:55:23,001 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:55:23,001 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:55:27,564 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:55:38,143 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:55:38,144 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:55:53,278 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:55:53,278 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:55:57,809 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:56:08,417 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 09:56:08,418 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 09:56:13,380 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:56:14,385 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 09:56:14,385 DEBUG   SenderThread:3334444 [sender.py:send():234] send: telemetry
+2022-07-30 09:56:14,386 DEBUG   SenderThread:3334444 [sender.py:send():234] send: exit
+2022-07-30 09:56:14,386 INFO    SenderThread:3334444 [sender.py:send_exit():366] handling exit code: 1
+2022-07-30 09:56:14,388 INFO    SenderThread:3334444 [sender.py:send_exit():368] handling runtime: 979
+2022-07-30 09:56:14,388 INFO    SenderThread:3334444 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 09:56:14,389 INFO    SenderThread:3334444 [sender.py:send_exit():374] send defer
+2022-07-30 09:56:14,389 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 09:56:14,390 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 09:56:14,390 INFO    HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-30 09:56:14,390 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: defer
+2022-07-30 09:56:14,390 INFO    SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-30 09:56:14,390 INFO    SenderThread:3334444 [sender.py:transition_state():387] send defer: 1
+2022-07-30 09:56:14,390 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 09:56:14,391 INFO    HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-30 09:56:14,451 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: defer
+2022-07-30 09:56:14,451 INFO    SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-30 09:56:14,451 INFO    SenderThread:3334444 [sender.py:transition_state():387] send defer: 2
+2022-07-30 09:56:14,452 DEBUG   SenderThread:3334444 [sender.py:send():234] send: stats
+2022-07-30 09:56:14,452 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 09:56:14,452 INFO    HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-30 09:56:14,453 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: defer
+2022-07-30 09:56:14,453 INFO    SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-30 09:56:14,453 INFO    SenderThread:3334444 [sender.py:transition_state():387] send defer: 3
+2022-07-30 09:56:14,453 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 09:56:14,453 INFO    HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-30 09:56:14,453 DEBUG   SenderThread:3334444 [sender.py:send():234] send: summary
+2022-07-30 09:56:14,454 INFO    SenderThread:3334444 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 09:56:14,454 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: defer
+2022-07-30 09:56:14,454 INFO    SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-30 09:56:14,454 INFO    SenderThread:3334444 [sender.py:transition_state():387] send defer: 4
+2022-07-30 09:56:14,454 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 09:56:14,454 INFO    HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-30 09:56:14,454 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: defer
+2022-07-30 09:56:14,454 INFO    SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-30 09:56:14,492 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 09:56:14,617 INFO    SenderThread:3334444 [sender.py:transition_state():387] send defer: 5
+2022-07-30 09:56:14,618 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 09:56:14,618 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 09:56:14,618 INFO    HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-30 09:56:14,618 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: defer
+2022-07-30 09:56:14,618 INFO    SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-30 09:56:14,619 INFO    SenderThread:3334444 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-30 09:56:14,719 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 09:56:15,381 INFO    Thread-8  :3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:56:15,381 INFO    SenderThread:3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/wandb-summary.json
+2022-07-30 09:56:15,381 INFO    SenderThread:3334444 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/config.yaml
+2022-07-30 09:56:15,382 INFO    SenderThread:3334444 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files
+2022-07-30 09:56:15,382 INFO    SenderThread:3334444 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/config.yaml config.yaml
+2022-07-30 09:56:15,382 INFO    SenderThread:3334444 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/diff.patch diff.patch
+2022-07-30 09:56:15,382 INFO    SenderThread:3334444 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/requirements.txt requirements.txt
+2022-07-30 09:56:15,382 INFO    SenderThread:3334444 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log output.log
+2022-07-30 09:56:15,385 INFO    SenderThread:3334444 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/wandb-summary.json wandb-summary.json
+2022-07-30 09:56:15,386 INFO    SenderThread:3334444 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/wandb-metadata.json wandb-metadata.json
+2022-07-30 09:56:15,389 INFO    SenderThread:3334444 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-30 09:56:15,389 INFO    SenderThread:3334444 [sender.py:transition_state():387] send defer: 6
+2022-07-30 09:56:15,389 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 09:56:15,395 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 09:56:15,395 INFO    HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-30 09:56:15,395 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: defer
+2022-07-30 09:56:15,395 INFO    SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-30 09:56:15,395 INFO    SenderThread:3334444 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 09:56:15,493 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 09:56:15,493 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 09:56:15,595 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 09:56:15,595 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 09:56:15,696 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 09:56:15,696 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 09:56:15,798 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 09:56:15,798 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 09:56:15,860 INFO    Thread-15 :3334444 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/requirements.txt
+2022-07-30 09:56:15,860 INFO    Thread-14 :3334444 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/config.yaml
+2022-07-30 09:56:15,864 INFO    Thread-17 :3334444 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/wandb-summary.json
+2022-07-30 09:56:15,899 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 09:56:15,899 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 09:56:16,001 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 09:56:16,001 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 09:56:16,102 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 09:56:16,103 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 09:56:16,139 INFO    Thread-16 :3334444 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/files/output.log
+2022-07-30 09:56:16,204 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 09:56:16,204 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 09:56:16,305 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 09:56:16,306 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 09:56:16,339 INFO    Thread-7  :3334444 [sender.py:transition_state():387] send defer: 7
+2022-07-30 09:56:16,340 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 09:56:16,340 INFO    HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-30 09:56:16,340 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: defer
+2022-07-30 09:56:16,340 INFO    SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-30 09:56:16,407 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 09:56:16,780 INFO    SenderThread:3334444 [sender.py:transition_state():387] send defer: 8
+2022-07-30 09:56:16,780 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 09:56:16,781 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 09:56:16,781 INFO    HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-30 09:56:16,781 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: defer
+2022-07-30 09:56:16,781 INFO    SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-30 09:56:16,781 INFO    SenderThread:3334444 [sender.py:transition_state():387] send defer: 9
+2022-07-30 09:56:16,782 DEBUG   SenderThread:3334444 [sender.py:send():234] send: final
+2022-07-30 09:56:16,782 DEBUG   SenderThread:3334444 [sender.py:send():234] send: footer
+2022-07-30 09:56:16,782 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 09:56:16,782 INFO    HandlerThread:3334444 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-30 09:56:16,782 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: defer
+2022-07-30 09:56:16,782 INFO    SenderThread:3334444 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-30 09:56:16,882 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 09:56:16,882 DEBUG   SenderThread:3334444 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 09:56:16,882 INFO    SenderThread:3334444 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 09:56:17,140 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-30 09:56:17,141 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-30 09:56:17,141 DEBUG   HandlerThread:3334444 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-30 09:56:17,141 INFO    HandlerThread:3334444 [handler.py:finish():731] shutting down handler
+2022-07-30 09:56:17,782 INFO    WriterThread:3334444 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/run-16dexcvn.wandb
+2022-07-30 09:56:18,139 INFO    SenderThread:3334444 [sender.py:finish():1070] shutting down sender
+2022-07-30 09:56:18,139 INFO    SenderThread:3334444 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 09:56:18,139 INFO    SenderThread:3334444 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 09:56:18,142 INFO    MainThread:3334444 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220730_093953-16dexcvn/logs/debug.log b/wandb/run-20220730_093953-16dexcvn/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..84df8b2c6657d2d40292d619d99e980e38dd7d14
--- /dev/null
+++ b/wandb/run-20220730_093953-16dexcvn/logs/debug.log
@@ -0,0 +1,157 @@
+2022-07-30 09:39:53,728 INFO    MainThread:3333166 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-30 09:39:53,728 INFO    MainThread:3333166 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-30 09:39:53,728 INFO    MainThread:3333166 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/logs/debug.log
+2022-07-30 09:39:53,728 INFO    MainThread:3333166 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_093953-16dexcvn/logs/debug-internal.log
+2022-07-30 09:39:53,728 INFO    MainThread:3333166 [wandb_init.py:init():404] calling init triggers
+2022-07-30 09:39:53,728 INFO    MainThread:3333166 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-30 09:39:53,728 INFO    MainThread:3333166 [wandb_init.py:init():460] starting backend
+2022-07-30 09:39:53,728 INFO    MainThread:3333166 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-30 09:39:53,775 INFO    MainThread:3333166 [backend.py:ensure_launched():216] starting backend process...
+2022-07-30 09:39:53,821 INFO    MainThread:3333166 [backend.py:ensure_launched():221] started backend process with pid: 3334444
+2022-07-30 09:39:53,823 INFO    MainThread:3333166 [wandb_init.py:init():469] backend started and connected
+2022-07-30 09:39:53,837 INFO    MainThread:3333166 [wandb_init.py:init():533] updated telemetry
+2022-07-30 09:39:53,952 INFO    MainThread:3333166 [wandb_init.py:init():563] communicating current version
+2022-07-30 09:39:54,674 INFO    MainThread:3333166 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-30 09:39:54,674 INFO    MainThread:3333166 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-30 09:39:54,876 INFO    MainThread:3333166 [wandb_init.py:init():606] starting run threads in backend
+2022-07-30 09:39:57,262 INFO    MainThread:3333166 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-30 09:39:57,262 INFO    MainThread:3333166 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-30 09:39:57,263 INFO    MainThread:3333166 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-30 09:39:57,265 INFO    MainThread:3333166 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-30 09:39:57,265 INFO    MainThread:3333166 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-30 09:56:12,210 INFO    MainThread:3333166 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-30 09:56:12,216 INFO    MainThread:3333166 [wandb_run.py:_restore():1752] restore
+2022-07-30 09:56:14,390 INFO    MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 74317
+}
+
+2022-07-30 09:56:14,618 INFO    MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 74317
+}
+
+2022-07-30 09:56:15,392 INFO    MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 5
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 375734
+}
+
+2022-07-30 09:56:15,494 INFO    MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74317
+  total_bytes: 375762
+}
+
+2022-07-30 09:56:15,595 INFO    MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 375762
+  total_bytes: 375762
+}
+
+2022-07-30 09:56:15,697 INFO    MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 375762
+  total_bytes: 375762
+}
+
+2022-07-30 09:56:15,798 INFO    MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 375762
+  total_bytes: 375762
+}
+
+2022-07-30 09:56:15,900 INFO    MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 375762
+  total_bytes: 375762
+}
+
+2022-07-30 09:56:16,002 INFO    MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 375762
+  total_bytes: 375762
+}
+
+2022-07-30 09:56:16,103 INFO    MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 375762
+  total_bytes: 375762
+}
+
+2022-07-30 09:56:16,205 INFO    MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 375762
+  total_bytes: 375762
+}
+
+2022-07-30 09:56:16,306 INFO    MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 375762
+  total_bytes: 375762
+}
+
+2022-07-30 09:56:16,781 INFO    MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 375762
+  total_bytes: 375762
+}
+
+2022-07-30 09:56:17,139 INFO    MainThread:3333166 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 375762
+  total_bytes: 375762
+}
+local_info {
+}
+
+2022-07-30 09:56:18,716 INFO    MainThread:3333166 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220730_093953-16dexcvn/run-16dexcvn.wandb b/wandb/run-20220730_093953-16dexcvn/run-16dexcvn.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..d52e071e7cfc7360c4146cb58590c19398708948
--- /dev/null
+++ b/wandb/run-20220730_093953-16dexcvn/run-16dexcvn.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea59404e0e117ed431b8d34e151d0ec69f1ba66330a1ea2ea4d8a2bdad59c8ff
+size 418608
diff --git a/wandb/run-20220730_111754-bhdpxdi4/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_111754-bhdpxdi4/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac
--- /dev/null
+++ b/wandb/run-20220730_111754-bhdpxdi4/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1605 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220730_111754-bhdpxdi4/files/config.yaml b/wandb/run-20220730_111754-bhdpxdi4/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cc145b9e10df828754a63ef17390f3f5ec5573f7
--- /dev/null
+++ b/wandb/run-20220730_111754-bhdpxdi4/files/config.yaml
@@ -0,0 +1,27 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659179874
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220730_111754-bhdpxdi4/files/diff.patch b/wandb/run-20220730_111754-bhdpxdi4/files/diff.patch
new file mode 100644
index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642
--- /dev/null
+++ b/wandb/run-20220730_111754-bhdpxdi4/files/diff.patch
@@ -0,0 +1,10 @@
+diff --git a/.gitattributes b/.gitattributes
+index 755356a..f0eef4b 100644
+--- a/.gitattributes
++++ b/.gitattributes
+@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+ *.zip filter=lfs diff=lfs merge=lfs -text
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
++*.wandb filter=lfs diff=lfs merge=lfs -text
+\ No newline at end of file
diff --git a/wandb/run-20220730_111754-bhdpxdi4/files/output.log b/wandb/run-20220730_111754-bhdpxdi4/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..5416b7792aa2700437aa8a0b76949f8e9e74e2c6
--- /dev/null
+++ b/wandb/run-20220730_111754-bhdpxdi4/files/output.log
@@ -0,0 +1,736 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul30_11-17-50_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=24,
+per_device_train_batch_size=24,
+precision=full_mixed,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 48.08it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 445.81it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('quantizer', 'weight_proj', 'bias'), ('project_hid', 'bias'), ('project_q', 'kernel'), ('project_q', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('quantizer', 'codevectors'), ('project_hid', 'kernel')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8750.90ex/s]
+removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8172.52ex/s]
+removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8842.39ex/s]
+removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7248.79ex/s]
+removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8055.10ex/s]
+removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7504.68ex/s]
+removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7551.02ex/s]
+removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8173.02ex/s]
+removing punctuation from train split #7:  66%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                   | 6262/9523 [00:00<00:00, 7376.88ex/s]
+removing punctuation from train split #5:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎         | 9056/9523 [00:01<00:00, 8176.29ex/s]
+removing punctuation from train split #6:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                           | 8222/9523 [00:01<00:00, 7796.95ex/s]
+removing punctuation from train split #8:  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                          | 7495/9523 [00:00<00:00, 8188.42ex/s]
+removing punctuation from train split #6:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌         | 9068/9523 [00:01<00:00, 7989.38ex/s]
+removing punctuation from train split #7:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                      | 8440/9523 [00:01<00:00, 6953.64ex/s]
+removing punctuation from train split #8:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏      | 9191/9523 [00:01<00:00, 8334.11ex/s]
+removing punctuation from train split #10:  83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                 | 7893/9523 [00:00<00:00, 9072.25ex/s]
+removing punctuation from train split #10:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎             | 8860/9523 [00:01<00:00, 9254.14ex/s]
+removing punctuation from train split #9:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊          | 9030/9523 [00:01<00:00, 8244.93ex/s]
+removing punctuation from train split #15:  42%|██████████████████████████████████████████████████████████████████████████████████▍                                                                                                                 | 4004/9522 [00:00<00:00, 8124.94ex/s]
+removing punctuation from train split #11:  86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                           | 8200/9523 [00:01<00:00, 8162.87ex/s]
+removing punctuation from train split #12:  78%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                           | 7420/9522 [00:00<00:00, 8095.21ex/s]
+removing punctuation from train split #15:  60%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                               | 5667/9522 [00:00<00:00, 8223.08ex/s]
+removing punctuation from train split #16:  59%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                | 5595/9522 [00:00<00:00, 8108.71ex/s]
+removing punctuation from train split #17:  50%|██████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                 | 4795/9522 [00:00<00:00, 8092.70ex/s]
+removing punctuation from train split #18:  40%|███████████████████████████████████████████████████████████████████████████████                                                                                                                     | 3844/9522 [00:00<00:00, 6126.31ex/s]
+removing punctuation from train split #17:  59%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                | 5635/9522 [00:00<00:00, 8191.52ex/s]
+removing punctuation from train split #20:  34%|██████████████████████████████████████████████████████████████████▊                                                                                                                                 | 3245/9522 [00:00<00:00, 8190.98ex/s]
+removing punctuation from train split #21:  34%|█████████████████████████████████████████████████████████████████▋                                                                                                                                  | 3193/9522 [00:00<00:00, 8091.29ex/s]
+removing punctuation from train split #20:  43%|███████████████████████████████████████████████████████████████████████████████████▉                                                                                                                | 4080/9522 [00:00<00:00, 8246.49ex/s]
+removing punctuation from train split #23:  17%|█████████████████████████████████▏                                                                                                                                                                  | 1613/9522 [00:00<00:00, 8106.45ex/s]
+removing punctuation from train split #22:  34%|███████████████████████████████████████████████████████████████████▏                                                                                                                                | 3266/9522 [00:00<00:00, 8245.94ex/s]
+removing punctuation from train split #23:  26%|██████████████████████████████████████████████████▋                                                                                                                                                 | 2460/9522 [00:00<00:00, 8268.25ex/s]
+removing punctuation from train split #26:   8%|███████████████▌                                                                                                                                                                                     | 755/9522 [00:00<00:01, 7549.31ex/s]
+removing punctuation from train split #20:  69%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                            | 6571/9522 [00:00<00:00, 7735.32ex/s]
+removing punctuation from train split #18:  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                        | 7562/9522 [00:01<00:00, 7402.62ex/s]
+removing punctuation from train split #20:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                          | 7440/9522 [00:00<00:00, 8018.52ex/s]
+removing punctuation from train split #18:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                     | 8471/9522 [00:01<00:00, 7895.82ex/s]
+removing punctuation from train split #18:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 9426/9522 [00:01<00:00, 8382.41ex/s]
+removing punctuation from train split #20:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                        | 8335/9522 [00:01<00:00, 8296.36ex/s]
+removing punctuation from train split #20:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████     | 9281/9522 [00:01<00:00, 8643.22ex/s]
+removing punctuation from train split #22:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                          | 7462/9522 [00:00<00:00, 7997.95ex/s]
+removing punctuation from train split #22:  88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                       | 8377/9522 [00:01<00:00, 8340.55ex/s]
+removing punctuation from train split #21:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎      | 9196/9522 [00:01<00:00, 6344.71ex/s]
+removing punctuation from train split #22:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋      | 9217/9522 [00:01<00:00, 7164.89ex/s]
+removing punctuation from train split #23:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                     | 8472/9522 [00:01<00:00, 7128.14ex/s]
+removing punctuation from train split #24:  79%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                          | 7478/9522 [00:01<00:00, 6169.29ex/s]
+removing punctuation from train split #23:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 9404/9522 [00:01<00:00, 7711.35ex/s]
+removing punctuation from train split #24:  88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                       | 8390/9522 [00:01<00:00, 6894.92ex/s]
+removing punctuation from train split #24:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋    | 9311/9522 [00:01<00:00, 7495.91ex/s]
+removing punctuation from train split #25:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌              | 8820/9522 [00:01<00:00, 8213.57ex/s]
+removing punctuation from train split #26:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 9392/9522 [00:01<00:00, 8332.47ex/s]
+removing punctuation from train split #27:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                    | 8543/9522 [00:01<00:00, 8777.93ex/s]
+removing punctuation from train split #28:  71%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                        | 6775/9522 [00:00<00:00, 8187.29ex/s]
+removing punctuation from train split #29:  55%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                       | 5277/9522 [00:00<00:00, 7516.57ex/s]
+removing punctuation from train split #27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 9506/9522 [00:01<00:00, 9030.56ex/s]
+removing punctuation from train split #29:  66%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                   | 6242/9522 [00:00<00:00, 8130.78ex/s]
+removing punctuation from train split #28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 9501/9522 [00:01<00:00, 8573.55ex/s]
+removing punctuation from train split #29:  86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                           | 8184/9522 [00:00<00:00, 8922.41ex/s]
+removing punctuation from train split #29:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎       | 9148/9522 [00:01<00:00, 9134.35ex/s]
+removing punctuation from train split #31:  67%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                | 6397/9522 [00:00<00:00, 8283.49ex/s]
+removing punctuation from train split #30:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍      | 9204/9522 [00:01<00:00, 8400.18ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00029_of_00032.arrow9285/9522 [00:01<00:00, 9150.66ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow
+preprocess dataset #0:   0%|▏                                                                                                                                                                                                                           | 10/9497 [00:01<10:51, 14.55ex/s]
+preprocess dataset #1:   0%|                                                                                                                                                                                                                                     | 0/9497 [00:00<?, ?ex/s]
+preprocess dataset #2:   0%|▋                                                                                                                                                                                                                           | 30/9497 [00:01<05:08, 30.66ex/s]
+preprocess dataset #3:   1%|██▋                                                                                                                                                                                                                        | 119/9497 [00:03<02:53, 54.19ex/s]
+preprocess dataset #4:   1%|█▉                                                                                                                                                                                                                          | 84/9497 [00:02<02:49, 55.50ex/s]
+preprocess dataset #5:   1%|█▏                                                                                                                                                                                                                          | 51/9497 [00:01<03:07, 50.47ex/s]
+preprocess dataset #6:   0%|▌                                                                                                                                                                                                                           | 22/9497 [00:01<05:05, 31.03ex/s]
+preprocess dataset #7:   0%|▊                                                                                                                                                                                                                           | 36/9497 [00:01<04:37, 34.04ex/s]
+preprocess dataset #8:   0%|▌                                                                                                                                                                                                                           | 24/9497 [00:01<04:50, 32.61ex/s]
+preprocess dataset #9:   1%|█▌                                                                                                                                                                                                                          | 70/9497 [00:02<03:33, 44.25ex/s]
+preprocess dataset #10:   0%|                                                                                                                                                                                                                          | 1/9497 [00:00<2:33:14,  1.03ex/s]
+preprocess dataset #11:   1%|█▉                                                                                                                                                                                                                         | 84/9496 [00:02<03:30, 44.67ex/s]
+preprocess dataset #12:   0%|▋                                                                                                                                                                                                                          | 32/9496 [00:01<04:58, 31.71ex/s]
+preprocess dataset #13:   1%|█▍                                                                                                                                                                                                                         | 64/9496 [00:02<03:41, 42.58ex/s]
+preprocess dataset #14:   0%|▋                                                                                                                                                                                                                          | 30/9496 [00:01<04:47, 32.90ex/s]
+preprocess dataset #15:   1%|██                                                                                                                                                                                                                         | 90/9496 [00:03<03:46, 41.48ex/s]
+preprocess dataset #16:   0%|▋                                                                                                                                                                                                                          | 30/9496 [00:01<04:44, 33.29ex/s]
+preprocess dataset #17:   1%|█▎                                                                                                                                                                                                                         | 57/9496 [00:02<04:52, 32.30ex/s]
+preprocess dataset #18:   0%|▍                                                                                                                                                                                                                          | 20/9496 [00:01<07:12, 21.89ex/s]
+preprocess dataset #19:   1%|█▏                                                                                                                                                                                                                         | 49/9496 [00:02<05:40, 27.72ex/s]
+preprocess dataset #20:   0%|▌                                                                                                                                                                                                                          | 24/9496 [00:01<05:56, 26.58ex/s]
+preprocess dataset #21:   1%|█▏                                                                                                                                                                                                                         | 50/9496 [00:02<04:44, 33.18ex/s]
+preprocess dataset #22:   0%|▎                                                                                                                                                                                                                          | 12/9496 [00:01<10:24, 15.19ex/s]
+preprocess dataset #11:   6%|████████████▏                                                                                                                                                                                                             | 532/9496 [00:14<05:04, 29.40ex/s]
+preprocess dataset #12:   5%|█████████▉                                                                                                                                                                                                                | 435/9496 [00:14<05:29, 27.49ex/s]
+preprocess dataset #13:   5%|█████████▉                                                                                                                                                                                                                | 433/9496 [00:12<05:00, 30.17ex/s]
+preprocess dataset #14:   4%|█████████▏                                                                                                                                                                                                                | 398/9496 [00:12<04:58, 30.51ex/s]
+preprocess dataset #15:   4%|████████▎                                                                                                                                                                                                                 | 364/9496 [00:11<04:50, 31.39ex/s]
+preprocess dataset #16:   3%|██████▉                                                                                                                                                                                                                   | 304/9496 [00:09<05:10, 29.56ex/s]
+preprocess dataset #17:   3%|█████▍                                                                                                                                                                                                                    | 239/9496 [00:08<05:48, 26.55ex/s]
+preprocess dataset #18:   2%|████▍                                                                                                                                                                                                                     | 194/9496 [00:08<06:16, 24.73ex/s]
+preprocess dataset #19:   2%|███▊                                                                                                                                                                                                                      | 164/9496 [00:06<05:55, 26.23ex/s]
+preprocess dataset #20:   2%|███▍                                                                                                                                                                                                                      | 152/9496 [00:05<05:43, 27.19ex/s]
+preprocess dataset #21:   1%|██▍                                                                                                                                                                                                                       | 108/9496 [00:04<05:46, 27.09ex/s]
+preprocess dataset #22:   1%|█▋                                                                                                                                                                                                                         | 71/9496 [00:03<06:04, 25.87ex/s]
+preprocess dataset #23:   0%|█                                                                                                                                                                                                                          | 45/9496 [00:02<05:54, 26.63ex/s]
+preprocess dataset #24:   0%|▏                                                                                                                                                                                                                          | 10/9496 [00:01<12:36, 12.54ex/s]
+preprocess dataset #25:   0%|▉                                                                                                                                                                                                                          | 38/9496 [00:02<04:42, 33.49ex/s]
+preprocess dataset #26:   0%|                                                                                                                                                                                                                                    | 0/9496 [00:00<?, ?ex/s]
+preprocess dataset #27:   0%|▉                                                                                                                                                                                                                          | 40/9496 [00:02<06:35, 23.89ex/s]
+preprocess dataset #28:   0%|▍                                                                                                                                                                                                                          | 19/9496 [00:01<08:13, 19.22ex/s]
+preprocess dataset #29:   0%|▊                                                                                                                                                                                                                          | 35/9496 [00:02<05:29, 28.68ex/s]
+preprocess dataset #30:   1%|█▍                                                                                                                                                                                                                         | 62/9496 [00:03<05:17, 29.71ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #18:  61%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                   | 5832/9496 [03:07<01:45, 34.73ex/s]
+preprocess dataset #19:  61%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                     | 5773/9496 [03:05<01:43, 35.97ex/s]
+preprocess dataset #20:  57%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                            | 5437/9496 [03:04<02:22, 28.58ex/s]
+preprocess dataset #21:  60%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                      | 5695/9496 [03:03<01:51, 34.04ex/s]
+preprocess dataset #22:  59%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                        | 5648/9496 [03:02<01:42, 37.69ex/s]
+preprocess dataset #23:  59%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                        | 5630/9496 [03:01<01:51, 34.63ex/s]
+preprocess dataset #24:  59%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                         | 5581/9496 [03:00<01:49, 35.85ex/s]
+preprocess dataset #25:  59%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                         | 5584/9496 [02:59<01:53, 34.52ex/s]
+preprocess dataset #26:  58%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                           | 5516/9496 [02:58<02:05, 31.79ex/s]
+preprocess dataset #27:  55%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                 | 5238/9496 [02:57<02:17, 30.94ex/s]
+preprocess dataset #28:  54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                   | 5148/9496 [02:56<02:26, 29.67ex/s]
+preprocess dataset #29:  54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                   | 5133/9496 [02:55<02:56, 24.67ex/s]
+preprocess dataset #30:  54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                   | 5149/9496 [02:54<02:45, 26.21ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #27:  65%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                            | 6173/9496 [03:29<01:37, 34.24ex/s]
+preprocess dataset #28:  64%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                             | 6096/9496 [03:28<02:06, 26.90ex/s]
+preprocess dataset #29:  64%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                             | 6094/9496 [03:27<02:37, 21.56ex/s]
+preprocess dataset #28:  65%|█████████████████████
\ No newline at end of file
diff --git a/wandb/run-20220730_111754-bhdpxdi4/files/requirements.txt b/wandb/run-20220730_111754-bhdpxdi4/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab
--- /dev/null
+++ b/wandb/run-20220730_111754-bhdpxdi4/files/requirements.txt
@@ -0,0 +1,158 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+exceptiongroup==1.0.0rc8
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+hypothesis==6.53.0
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+jiwer==2.3.0
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pyctcdecode==0.4.0
+pygments==2.11.1
+pygtrie==2.5.0
+pyparsing==3.0.6
+python-dateutil==2.8.2
+python-levenshtein==0.12.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+sortedcontainers==2.4.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220730_111754-bhdpxdi4/files/wandb-metadata.json b/wandb/run-20220730_111754-bhdpxdi4/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..cdeda2b8542f9dfc1b1a6945dfaa47eac801ecff
--- /dev/null
+++ b/wandb/run-20220730_111754-bhdpxdi4/files/wandb-metadata.json
@@ -0,0 +1,67 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-30T11:17:58.053612",
+    "startedAt": "2022-07-30T11:17:54.631489",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=./",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=24",
+        "--per_device_eval_batch_size=24",
+        "--gradient_accumulation_steps=1",
+        "--precision=full_mixed",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220730_111754-bhdpxdi4/files/wandb-summary.json b/wandb/run-20220730_111754-bhdpxdi4/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b
--- /dev/null
+++ b/wandb/run-20220730_111754-bhdpxdi4/files/wandb-summary.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/wandb/run-20220730_111754-bhdpxdi4/logs/debug-internal.log b/wandb/run-20220730_111754-bhdpxdi4/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..a25495ae60bfcd825b2f82670c832e903aee2ed0
--- /dev/null
+++ b/wandb/run-20220730_111754-bhdpxdi4/logs/debug-internal.log
@@ -0,0 +1,232 @@
+2022-07-30 11:17:55,554 INFO    MainThread:2423308 [internal.py:wandb_internal():87] W&B internal server running at pid: 2423308, started at: 2022-07-30 11:17:55.554249
+2022-07-30 11:17:55,556 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: check_version
+2022-07-30 11:17:55,556 INFO    WriterThread:2423308 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/run-bhdpxdi4.wandb
+2022-07-30 11:17:55,557 DEBUG   SenderThread:2423308 [sender.py:send():234] send: header
+2022-07-30 11:17:55,557 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: check_version
+2022-07-30 11:17:55,596 DEBUG   SenderThread:2423308 [sender.py:send():234] send: run
+2022-07-30 11:17:55,779 INFO    SenderThread:2423308 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files
+2022-07-30 11:17:55,779 INFO    SenderThread:2423308 [sender.py:_start_run_threads():804] run started: bhdpxdi4 with start time 1659179874
+2022-07-30 11:17:55,779 DEBUG   SenderThread:2423308 [sender.py:send():234] send: summary
+2022-07-30 11:17:55,779 INFO    SenderThread:2423308 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 11:17:55,780 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: run_start
+2022-07-30 11:17:56,781 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/wandb-summary.json
+2022-07-30 11:17:58,053 DEBUG   HandlerThread:2423308 [meta.py:__init__():40] meta init
+2022-07-30 11:17:58,053 DEBUG   HandlerThread:2423308 [meta.py:__init__():54] meta init done
+2022-07-30 11:17:58,053 DEBUG   HandlerThread:2423308 [meta.py:probe():214] probe
+2022-07-30 11:17:58,054 DEBUG   HandlerThread:2423308 [meta.py:_setup_git():204] setup git
+2022-07-30 11:17:58,092 DEBUG   HandlerThread:2423308 [meta.py:_setup_git():211] setup git done
+2022-07-30 11:17:58,093 DEBUG   HandlerThread:2423308 [meta.py:_save_code():92] save code
+2022-07-30 11:17:58,106 DEBUG   HandlerThread:2423308 [meta.py:_save_code():113] save code done
+2022-07-30 11:17:58,106 DEBUG   HandlerThread:2423308 [meta.py:_save_patches():130] save patches
+2022-07-30 11:17:58,183 DEBUG   HandlerThread:2423308 [meta.py:_save_patches():172] save patches done
+2022-07-30 11:17:58,184 DEBUG   HandlerThread:2423308 [meta.py:_save_pip():58] save pip
+2022-07-30 11:17:58,184 DEBUG   HandlerThread:2423308 [meta.py:_save_pip():72] save pip done
+2022-07-30 11:17:58,184 DEBUG   HandlerThread:2423308 [meta.py:probe():252] probe done
+2022-07-30 11:17:58,187 DEBUG   SenderThread:2423308 [sender.py:send():234] send: files
+2022-07-30 11:17:58,187 INFO    SenderThread:2423308 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-30 11:17:58,187 INFO    SenderThread:2423308 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-30 11:17:58,188 INFO    SenderThread:2423308 [sender.py:_save_file():939] saving file diff.patch with policy now
+2022-07-30 11:17:58,193 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:17:58,193 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:17:58,697 INFO    Thread-13 :2423308 [upload_job.py:push():137] Uploaded file /tmp/tmph2_qee6lwandb/1npnvlew-diff.patch
+2022-07-30 11:17:58,742 INFO    Thread-11 :2423308 [upload_job.py:push():137] Uploaded file /tmp/tmph2_qee6lwandb/lqpi33o6-wandb-metadata.json
+2022-07-30 11:17:58,782 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/wandb-metadata.json
+2022-07-30 11:17:58,782 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/requirements.txt
+2022-07-30 11:17:58,782 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/code/run_flax_speech_recognition_ctc.py
+2022-07-30 11:17:58,782 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/diff.patch
+2022-07-30 11:17:58,783 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:17:58,783 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/code
+2022-07-30 11:17:58,859 INFO    Thread-12 :2423308 [upload_job.py:push():137] Uploaded file /tmp/tmph2_qee6lwandb/2dcnux9j-code/run_flax_speech_recognition_ctc.py
+2022-07-30 11:18:00,783 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:18:02,784 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:18:04,785 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:18:06,786 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:18:12,789 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:18:13,331 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:18:13,331 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:18:14,790 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:18:26,139 DEBUG   SenderThread:2423308 [sender.py:send():234] send: stats
+2022-07-30 11:18:28,470 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:18:28,470 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:18:28,795 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:18:30,796 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:18:41,801 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:18:43,615 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:18:43,615 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:18:43,802 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:18:56,216 DEBUG   SenderThread:2423308 [sender.py:send():234] send: stats
+2022-07-30 11:18:57,808 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:18:58,846 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:18:58,847 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:19:13,997 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:19:13,997 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:19:26,292 DEBUG   SenderThread:2423308 [sender.py:send():234] send: stats
+2022-07-30 11:19:29,394 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:19:29,394 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:19:40,827 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:19:42,828 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:19:44,630 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:19:44,630 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:19:44,829 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:19:46,830 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:19:48,831 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:19:50,832 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:19:52,833 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:19:54,834 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:19:56,359 DEBUG   SenderThread:2423308 [sender.py:send():234] send: stats
+2022-07-30 11:19:56,835 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:19:58,836 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:19:59,807 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:19:59,807 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:20:00,837 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:02,838 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:04,838 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:06,839 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:08,840 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:10,841 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:12,842 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:14,843 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:14,975 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:20:14,975 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:20:16,844 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:18,845 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:20,846 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:22,847 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:24,849 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:26,429 DEBUG   SenderThread:2423308 [sender.py:send():234] send: stats
+2022-07-30 11:20:27,850 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:29,851 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:30,124 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:20:30,124 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:20:31,852 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:33,853 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:35,854 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:37,855 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:39,856 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:41,858 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:43,858 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:45,279 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:20:45,279 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:20:45,858 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:47,859 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:49,860 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:51,862 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:53,863 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:55,864 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:56,503 DEBUG   SenderThread:2423308 [sender.py:send():234] send: stats
+2022-07-30 11:20:57,865 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:20:59,866 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:00,425 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:21:00,425 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:21:01,867 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:03,868 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:05,869 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:07,870 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:09,871 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:11,872 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:13,873 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:15,565 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:21:15,566 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:21:15,874 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:17,875 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:19,876 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:21,877 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:23,878 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:25,879 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:26,586 DEBUG   SenderThread:2423308 [sender.py:send():234] send: stats
+2022-07-30 11:21:27,880 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:29,881 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:30,710 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:21:30,710 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:21:31,882 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:33,882 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:35,883 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:37,885 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:39,885 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:41,886 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:43,887 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:45,858 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:21:45,858 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:21:45,888 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:47,889 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:49,891 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:51,892 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:53,893 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:55,894 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:56,668 DEBUG   SenderThread:2423308 [sender.py:send():234] send: stats
+2022-07-30 11:21:57,895 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:21:59,895 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:00,997 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:22:00,997 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:22:01,896 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:03,897 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:05,898 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:07,899 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:09,900 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:11,901 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:13,902 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:15,903 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:16,174 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:22:16,175 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:22:17,904 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:19,905 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:21,906 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:23,908 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:25,909 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:26,737 DEBUG   SenderThread:2423308 [sender.py:send():234] send: stats
+2022-07-30 11:22:27,912 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:29,910 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:31,310 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:22:31,311 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:22:31,911 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:33,912 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:35,914 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:37,914 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:39,915 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:41,916 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:43,918 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:45,918 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:46,453 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:22:46,454 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:22:47,919 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:49,920 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:51,921 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:53,922 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:55,923 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:56,810 DEBUG   SenderThread:2423308 [sender.py:send():234] send: stats
+2022-07-30 11:22:57,924 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:22:59,925 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:01,612 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:23:01,613 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:23:01,926 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:03,927 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:05,932 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:07,933 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:09,934 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:11,935 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:13,936 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:15,936 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:16,752 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:23:16,752 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:23:17,938 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:19,939 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:21,940 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:23,941 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:25,942 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:26,895 DEBUG   SenderThread:2423308 [sender.py:send():234] send: stats
+2022-07-30 11:23:27,943 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:29,944 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:31,892 DEBUG   HandlerThread:2423308 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:23:31,892 DEBUG   SenderThread:2423308 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:23:31,945 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:33,946 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:35,947 INFO    Thread-8  :2423308 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log
+2022-07-30 11:23:36,306 INFO    HandlerThread:2423308 [handler.py:finish():731] shutting down handler
+2022-07-30 11:23:36,307 INFO    SenderThread:2423308 [sender.py:finish():1070] shutting down sender
+2022-07-30 11:23:36,308 INFO    SenderThread:2423308 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-30 11:23:36,948 INFO    SenderThread:2423308 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files
+2022-07-30 11:23:36,948 INFO    SenderThread:2423308 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/config.yaml config.yaml
+2022-07-30 11:23:36,948 INFO    SenderThread:2423308 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/diff.patch diff.patch
+2022-07-30 11:23:36,949 INFO    SenderThread:2423308 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/requirements.txt requirements.txt
+2022-07-30 11:23:36,952 INFO    SenderThread:2423308 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/output.log output.log
+2022-07-30 11:23:36,953 INFO    SenderThread:2423308 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/files/wandb-summary.j
\ No newline at end of file
diff --git a/wandb/run-20220730_111754-bhdpxdi4/logs/debug.log b/wandb/run-20220730_111754-bhdpxdi4/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..92936fd9efdcd73b70cbbc0a46f212a87a00ac32
--- /dev/null
+++ b/wandb/run-20220730_111754-bhdpxdi4/logs/debug.log
@@ -0,0 +1,26 @@
+2022-07-30 11:17:54,632 INFO    MainThread:2422000 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-30 11:17:54,633 INFO    MainThread:2422000 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-30 11:17:54,633 INFO    MainThread:2422000 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/logs/debug.log
+2022-07-30 11:17:54,633 INFO    MainThread:2422000 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_111754-bhdpxdi4/logs/debug-internal.log
+2022-07-30 11:17:54,633 INFO    MainThread:2422000 [wandb_init.py:init():404] calling init triggers
+2022-07-30 11:17:54,633 INFO    MainThread:2422000 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-30 11:17:54,633 INFO    MainThread:2422000 [wandb_init.py:init():460] starting backend
+2022-07-30 11:17:54,633 INFO    MainThread:2422000 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-30 11:17:54,716 INFO    MainThread:2422000 [backend.py:ensure_launched():216] starting backend process...
+2022-07-30 11:17:54,762 INFO    MainThread:2422000 [backend.py:ensure_launched():221] started backend process with pid: 2423308
+2022-07-30 11:17:54,764 INFO    MainThread:2422000 [wandb_init.py:init():469] backend started and connected
+2022-07-30 11:17:54,778 INFO    MainThread:2422000 [wandb_init.py:init():533] updated telemetry
+2022-07-30 11:17:54,891 INFO    MainThread:2422000 [wandb_init.py:init():563] communicating current version
+2022-07-30 11:17:55,595 INFO    MainThread:2422000 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-30 11:17:55,595 INFO    MainThread:2422000 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-30 11:17:55,779 INFO    MainThread:2422000 [wandb_init.py:init():606] starting run threads in backend
+2022-07-30 11:17:58,191 INFO    MainThread:2422000 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-30 11:17:58,192 INFO    MainThread:2422000 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-30 11:17:58,192 INFO    MainThread:2422000 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-30 11:17:58,195 INFO    MainThread:2422000 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-30 11:17:58,195 INFO    MainThread:2422000 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-30 11:24:08,678 INFO    MainThread:2422000 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-30 11:24:08,682 INFO    MainThread:2422000 [wandb_run.py:_restore():1752] restore
+2022-07-30 11:24:11,352 INFO    MainThread:2422000 [wandb_run.py:_restore():1752] restore
diff --git a/wandb/run-20220730_111754-bhdpxdi4/run-bhdpxdi4.wandb b/wandb/run-20220730_111754-bhdpxdi4/run-bhdpxdi4.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..29cebbbcd5e82b346d1b4f0b2527054790c9b89a
--- /dev/null
+++ b/wandb/run-20220730_111754-bhdpxdi4/run-bhdpxdi4.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b122aff5a9794cf3480c89304bb69de55d2263baa36547ca9af68d6a22c06ae
+size 184320
diff --git a/wandb/run-20220730_113845-2hglxdx5/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_113845-2hglxdx5/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac
--- /dev/null
+++ b/wandb/run-20220730_113845-2hglxdx5/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1605 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220730_113845-2hglxdx5/files/config.yaml b/wandb/run-20220730_113845-2hglxdx5/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4466091c28ac9a503d2d5308e13c26a93862e56e
--- /dev/null
+++ b/wandb/run-20220730_113845-2hglxdx5/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659181126
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220730_113845-2hglxdx5/files/diff.patch b/wandb/run-20220730_113845-2hglxdx5/files/diff.patch
new file mode 100644
index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642
--- /dev/null
+++ b/wandb/run-20220730_113845-2hglxdx5/files/diff.patch
@@ -0,0 +1,10 @@
+diff --git a/.gitattributes b/.gitattributes
+index 755356a..f0eef4b 100644
+--- a/.gitattributes
++++ b/.gitattributes
+@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+ *.zip filter=lfs diff=lfs merge=lfs -text
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
++*.wandb filter=lfs diff=lfs merge=lfs -text
+\ No newline at end of file
diff --git a/wandb/run-20220730_113845-2hglxdx5/files/output.log b/wandb/run-20220730_113845-2hglxdx5/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..2073ab99f98da47dd8fec496d9d1dd48b339d758
--- /dev/null
+++ b/wandb/run-20220730_113845-2hglxdx5/files/output.log
@@ -0,0 +1,1628 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul30_11-38-41_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=24,
+per_device_train_batch_size=24,
+precision=full_mixed,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 79.80it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 457.11it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_hid', 'bias'), ('project_q', 'bias'), ('project_q', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('quantizer', 'codevectors'), ('project_hid', 'kernel')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'bias'), ('lm_head', 'kernel')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #0:   5%|██████████▋                                                                                                                                                                                           | 514/9523 [00:00<00:01, 5134.30ex/s]
+removing punctuation from train split #1:   0%|                                                                                                                                                                                                                  | 0/9523 [00:00<?, ?ex/s]
+removing punctuation from train split #2:   0%|                                                                                                                                                                                                                  | 0/9523 [00:00<?, ?ex/s]
+removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8786.53ex/s]
+removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9184.94ex/s]
+removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8759.35ex/s]
+removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8528.12ex/s]
+removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8468.19ex/s]
+removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8500.91ex/s]
+removing punctuation from train split #9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8374.24ex/s]
+removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8345.06ex/s]
+removing punctuation from train split #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8503.48ex/s]
+removing punctuation from train split #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8550.90ex/s]
+removing punctuation from train split #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8566.58ex/s]
+removing punctuation from train split #14: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8643.72ex/s]
+removing punctuation from train split #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8544.48ex/s]
+removing punctuation from train split #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8597.32ex/s]
+removing punctuation from train split #17: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8537.79ex/s]
+removing punctuation from train split #18: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8624.83ex/s]
+removing punctuation from train split #20: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8460.80ex/s]
+removing punctuation from train split #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8320.56ex/s]
+removing punctuation from train split #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8607.99ex/s]
+removing punctuation from train split #19: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 7106.31ex/s]
+removing punctuation from train split #14:  81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                    | 7748/9522 [00:00<00:00, 8733.56ex/s]
+removing punctuation from train split #12:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9465/9522 [00:01<00:00, 8646.53ex/s]
+removing punctuation from train split #16:  62%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                          | 5926/9522 [00:00<00:00, 8541.43ex/s]
+removing punctuation from train split #13: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 9482/9522 [00:01<00:00, 8677.06ex/s]
+removing punctuation from train split #17:  62%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                           | 5857/9522 [00:00<00:00, 8523.44ex/s]
+removing punctuation from train split #14: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 9519/9522 [00:01<00:00, 8794.87ex/s]
+removing punctuation from train split #16:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                     | 7686/9522 [00:00<00:00, 8677.59ex/s]
+removing punctuation from train split #15:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 9401/9522 [00:01<00:00, 8766.09ex/s]
+removing punctuation from train split #16:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                   | 8576/9522 [00:01<00:00, 8743.55ex/s]
+removing punctuation from train split #16:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9464/9522 [00:01<00:00, 8784.08ex/s]
+removing punctuation from train split #17:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                     | 8504/9522 [00:01<00:00, 8723.05ex/s]
+removing punctuation from train split #17:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 9392/9522 [00:01<00:00, 8769.18ex/s]
+removing punctuation from train split #18:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                   | 8587/9522 [00:01<00:00, 8821.14ex/s]
+removing punctuation from train split #18: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 9497/9522 [00:01<00:00, 8904.66ex/s]
+removing punctuation from train split #19:  68%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                               | 6437/9522 [00:01<00:00, 7100.28ex/s]
+removing punctuation from train split #19:  77%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                             | 7335/9522 [00:01<00:00, 7651.01ex/s]
+removing punctuation from train split #19:  86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                          | 8234/9522 [00:01<00:00, 8044.26ex/s]
+removing punctuation from train split #21:  87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                         | 8288/9522 [00:01<00:00, 8591.24ex/s]
+removing punctuation from train split #19:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎       | 9148/9522 [00:01<00:00, 8368.99ex/s]
+removing punctuation from train split #23:  73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                    | 6980/9522 [00:00<00:00, 7434.67ex/s]
+removing punctuation from train split #22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9475/9522 [00:01<00:00, 8766.99ex/s]
+removing punctuation from train split #23:  83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                 | 7878/9522 [00:01<00:00, 7880.29ex/s]
+removing punctuation from train split #26:  64%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                       | 6067/9522 [00:00<00:00, 8865.33ex/s]
+removing punctuation from train split #23:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌              | 8818/9522 [00:01<00:00, 8323.49ex/s]
+removing punctuation from train split #25:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                     | 7718/9522 [00:00<00:00, 8205.44ex/s]
+removing punctuation from train split #26:  82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                  | 7844/9522 [00:00<00:00, 8302.84ex/s]
+removing punctuation from train split #27:  73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                    | 6975/9522 [00:00<00:00, 8894.19ex/s]
+removing punctuation from train split #28:  56%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                      | 5320/9522 [00:00<00:00, 8990.40ex/s]
+removing punctuation from train split #29:  55%|████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                        | 5248/9522 [00:00<00:00, 8881.68ex/s]
+removing punctuation from train split #30:  44%|█████████████████████████████████████████████████████████████████████████████████████▍                                                                                                              | 4151/9522 [00:00<00:00, 8709.18ex/s]
+removing punctuation from train split #27:  92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏              | 8802/9522 [00:01<00:00, 8694.11ex/s]
+removing punctuation from train split #29:  75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                 | 7138/9522 [00:00<00:00, 9181.25ex/s]
+removing punctuation from train split #28:  86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                           | 8191/9522 [00:00<00:00, 9431.92ex/s]
+removing punctuation from train split #28:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊       | 9170/9522 [00:01<00:00, 9539.99ex/s]
+removing punctuation from train split #29:  85%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                              | 8057/9522 [00:00<00:00, 8567.31ex/s]
+removing punctuation from train split #29:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊          | 9030/9522 [00:01<00:00, 8906.97ex/s]
+removing punctuation from train split #30:  83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                 | 7901/9522 [00:00<00:00, 8401.19ex/s]
+removing punctuation from train split #30:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋             | 8873/9522 [00:01<00:00, 8781.37ex/s]
+removing punctuation from train split #31:  86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                           | 8202/9522 [00:00<00:00, 9480.83ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow9151/9522 [00:01<00:00, 8747.48ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow
+preprocess dataset #0:   1%|█▌                                                                                                                                                                                                                          | 67/9497 [00:02<04:50, 32.49ex/s]
+preprocess dataset #1:   0%|▊                                                                                                                                                                                                                           | 36/9497 [00:01<05:04, 31.05ex/s]
+preprocess dataset #2:   0%|▍                                                                                                                                                                                                                           | 19/9497 [00:01<06:50, 23.09ex/s]
+preprocess dataset #3:   1%|██▍                                                                                                                                                                                                                        | 104/9497 [00:02<02:43, 57.42ex/s]
+preprocess dataset #4:   1%|█▊                                                                                                                                                                                                                          | 76/9497 [00:01<02:45, 56.85ex/s]
+preprocess dataset #5:   0%|▋                                                                                                                                                                                                                           | 28/9497 [00:01<04:15, 37.03ex/s]
+preprocess dataset #6:   1%|██▍                                                                                                                                                                                                                        | 104/9497 [00:02<03:03, 51.17ex/s]
+preprocess dataset #7:   1%|█▌                                                                                                                                                                                                                          | 70/9497 [00:02<02:37, 59.75ex/s]
+preprocess dataset #8:   0%|                                                                                                                                                                                                                                     | 0/9497 [00:00<?, ?ex/s]
+preprocess dataset #9:   0%|█                                                                                                                                                                                                                           | 44/9497 [00:02<04:24, 35.79ex/s]
+preprocess dataset #10:   0%|▎                                                                                                                                                                                                                          | 14/9497 [00:01<08:36, 18.37ex/s]
+preprocess dataset #11:   1%|█▎                                                                                                                                                                                                                         | 55/9496 [00:01<03:45, 41.93ex/s]
+preprocess dataset #12:   0%|▍                                                                                                                                                                                                                          | 17/9496 [00:01<06:42, 23.53ex/s]
+preprocess dataset #13:   1%|█▌                                                                                                                                                                                                                         | 66/9496 [00:02<04:31, 34.68ex/s]
+preprocess dataset #14:   0%|▌                                                                                                                                                                                                                          | 27/9496 [00:01<05:25, 29.05ex/s]
+preprocess dataset #15:   1%|█▋                                                                                                                                                                                                                         | 71/9496 [00:02<03:54, 40.17ex/s]
+preprocess dataset #16:   0%|▊                                                                                                                                                                                                                          | 35/9496 [00:01<04:31, 34.88ex/s]
+preprocess dataset #17:   1%|█▍                                                                                                                                                                                                                         | 63/9496 [00:02<04:48, 32.71ex/s]
+preprocess dataset #18:   0%|▊                                                                                                                                                                                                                          | 35/9496 [00:01<05:09, 30.61ex/s]
+preprocess dataset #19:   1%|█▌                                                                                                                                                                                                                         | 70/9496 [00:02<04:33, 34.51ex/s]
+preprocess dataset #20:   0%|▊                                                                                                                                                                                                                          | 33/9496 [00:01<04:43, 33.43ex/s]
+preprocess dataset #21:   1%|█▍                                                                                                                                                                                                                         | 64/9496 [00:02<04:50, 32.45ex/s]
+preprocess dataset #22:   0%|▋                                                                                                                                                                                                                          | 29/9496 [00:01<05:33, 28.36ex/s]
+preprocess dataset #23:   1%|█▎                                                                                                                                                                                                                         | 59/9496 [00:02<05:39, 27.78ex/s]
+preprocess dataset #24:   1%|█▍                                                                                                                                                                                                                         | 65/9496 [00:03<05:07, 30.65ex/s]
+preprocess dataset #25:   0%|▍                                                                                                                                                                                                                          | 17/9496 [00:01<09:46, 16.16ex/s]
+preprocess dataset #26:   0%|                                                                                                                                                                                                                                    | 0/9496 [00:00<?, ?ex/s]
+preprocess dataset #27:   0%|▋                                                                                                                                                                                                                          | 32/9496 [00:02<07:14, 21.80ex/s]
+preprocess dataset #28:   1%|█▏                                                                                                                                                                                                                         | 54/9496 [00:03<07:28, 21.07ex/s]
+preprocess dataset #29:   0%|▋                                                                                                                                                                                                                          | 32/9496 [00:01<05:23, 29.22ex/s]
+preprocess dataset #30:   1%|█▎                                                                                                                                                                                                                         | 59/9496 [00:02<04:49, 32.55ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  34%|██████████████████████████████████████████████████████████████████████████▋                                                                                                                                              | 3266/9496 [01:51<03:26, 30.10ex/s]
+
+preprocess dataset #19:  40%|██████████████████████████████████████████████████████████████████████████████████████▊                                                                                                                                  | 3798/9496 [02:07<03:00, 31.64ex/s]
+preprocess dataset #20:  39%|█████████████████████████████████████████████████████████████████████████████████████▌                                                                                                                                   | 3745/9496 [02:06<03:04, 31.19ex/s]
+preprocess dataset #21:  39%|████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                                    | 3719/9496 [02:05<03:00, 32.04ex/s]
+preprocess dataset #22:  39%|█████████████████████████████████████████████████████████████████████████████████████▍                                                                                                                                   | 3740/9496 [02:04<02:46, 34.49ex/s]
+preprocess dataset #23:  39%|████████████████████████████████████████████████████████████████████████████████████▋                                                                                                                                    | 3704/9496 [02:03<03:02, 31.67ex/s]
+preprocess dataset #24:  39%|████████████████████████████████████████████████████████████████████████████████████▍                                                                                                                                    | 3693/9496 [02:02<03:01, 32.04ex/s]
+preprocess dataset #25:  38%|█████████████████████████████████████████████████████████████████████████████████▋                                                                                                                                       | 3575/9496 [02:00<03:05, 31.85ex/s]
+preprocess dataset #26:  37%|████████████████████████████████████████████████████████████████████████████████▉                                                                                                                                        | 3544/9496 [01:59<03:10, 31.27ex/s]
+preprocess dataset #27:  37%|████████████████████████████████████████████████████████████████████████████████▊                                                                                                                                        | 3534/9496 [01:58<03:20, 29.68ex/s]
+preprocess dataset #28:  37%|███████████████████████████████████████████████████████████████████████████████▊                                                                                                                                         | 3493/9496 [01:57<03:10, 31.51ex/s]
+preprocess dataset #29:  36%|██████████████████████████████████████████████████████████████████████████████▏                                                                                                                                          | 3422/9496 [01:56<03:05, 32.70ex/s]
+preprocess dataset #30:  36%|█████████████████████████████████████████████████████████████████████████████▋                                                                                                                                           | 3398/9496 [01:55<03:20, 30.44ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  68%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                      | 6423/9496 [03:35<01:51, 27.58ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #8:  86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                             | 8199/9497 [04:35<00:41, 30.97ex/s]
+preprocess dataset #9:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                          | 8359/9497 [04:35<00:39, 28.48ex/s]
+preprocess dataset #10:  87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                           | 8282/9497 [04:35<00:35, 33.91ex/s]
+preprocess dataset #11:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                           | 8309/9496 [04:34<00:39, 29.87ex/s]
+preprocess dataset #12:  88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                          | 8347/9496 [04:33<00:37, 30.95ex/s]
+preprocess dataset #13:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                              | 8159/9496 [04:32<00:50, 26.69ex/s]
+preprocess dataset #14:  86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                               | 8138/9496 [04:31<00:48, 27.99ex/s]
+preprocess dataset #15:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                              | 8145/9496 [04:30<00:47, 28.27ex/s]
+preprocess dataset #16:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                | 8085/9496 [04:29<00:46, 30.33ex/s]
+preprocess dataset #17:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                 | 8054/9496 [04:28<00:52, 27.39ex/s]
+preprocess dataset #18:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                              | 8149/9496 [04:27<00:43, 30.68ex/s]
+preprocess dataset #19:  85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                 | 8026/9496 [04:26<00:56, 26.01ex/s]
+preprocess dataset #20:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                  | 7996/9496 [04:24<00:46, 32.41ex/s]
+preprocess dataset #21:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                     | 7871/9496 [04:24<00:44, 36.26ex/s]
+preprocess dataset #22:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                   | 7952/9496 [04:23<00:40, 38.23ex/s]
+preprocess dataset #23:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                     | 7858/9496 [04:22<00:54, 30.15ex/s]
+preprocess dataset #24:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                    | 7888/9496 [04:20<00:45, 35.70ex/s]
+preprocess dataset #25:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                      | 7796/9496 [04:19<01:03, 26.78ex/s]
+preprocess dataset #26:  82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                       | 7774/9496 [04:18<00:58, 29.45ex/s]
+preprocess dataset #27:  82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                       | 7766/9496 [04:17<00:49, 34.97ex/s]
+preprocess dataset #28:  81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                         | 7706/9496 [04:16<00:51, 34.49ex/s]
+preprocess dataset #29:  80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                          | 7644/9496 [04:15<01:07, 27.64ex/s]
+preprocess dataset #30:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                           | 7592/9496 [04:14<00:57, 33.04ex/s]
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                           | 8294/9496 [04:37<00:38, 30.83ex/s]
+preprocess dataset #29:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                        | 8451/9496 [04:40<00:37, 28.10ex/s]
+preprocess dataset #30:  88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                          | 8354/9496 [04:39<00:42, 26.93ex/s]
+preprocess dataset #30:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                        | 8423/9496 [04:41<00:26, 40.12ex/s]
+preprocess dataset #30:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                       | 8484/9496 [04:43<00:29, 34.17ex/s]
+preprocess dataset #28:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                   | 8660/9496 [04:48<00:24, 34.23ex/s]
+preprocess dataset #29:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                    | 8618/9496 [04:46<00:33, 26.29ex/s]
+preprocess dataset #30:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                   | 8657/9496 [04:48<00:22, 36.80ex/s]
+
+preprocess dataset #29:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏              | 8846/9496 [04:52<00:15, 42.02ex/s]
+preprocess dataset #30:  93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                | 8788/9496 [04:51<00:22, 31.27ex/s]
+preprocess dataset #29:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏            | 8935/9496 [04:55<00:12, 45.43ex/s]
+preprocess dataset #30:  94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉              | 8880/9496 [04:54<00:13, 46.76ex/s]
+preprocess dataset #31:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋             | 8913/9496 [04:52<00:14, 39.18ex/s]
+preprocess dataset #28:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌          | 9039/9496 [04:58<00:14, 31.32ex/s]
+preprocess dataset #30:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊            | 8961/9496 [04:55<00:11, 45.79ex/s]
+preprocess dataset #30:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████            | 8971/9496 [04:56<00:12, 40.41ex/s]
+preprocess dataset #28:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉         | 9102/9496 [05:00<00:09, 41.49ex/s]
+preprocess dataset #29:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏         | 9066/9496 [04:58<00:09, 43.46ex/s]
+preprocess dataset #29:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎         | 9071/9496 [04:58<00:10, 39.48ex/s]
+preprocess dataset #31:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎          | 9029/9496 [04:56<00:13, 34.46ex/s]
+preprocess dataset #30:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊        | 9140/9496 [05:00<00:05, 59.46ex/s]
+preprocess dataset #27:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 9407/9496 [05:05<00:01, 58.28ex/s]
+preprocess dataset #28:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊   | 9355/9496 [05:04<00:02, 68.01ex/s]
+preprocess dataset #29:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍    | 9298/9496 [05:03<00:04, 43.57ex/s]
+preprocess dataset #30:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋     | 9266/9496 [05:02<00:03, 63.97ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9454/9496 [05:06<00:00, 56.23ex/s]
+preprocess dataset #29:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 9388/9496 [05:04<00:02, 49.87ex/s]
+preprocess dataset #30:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏  | 9372/9496 [05:03<00:02, 58.12ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 9469/9496 [05:05<00:00, 73.57ex/s]
+preprocess dataset #31: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 9492/9496 [05:04<00:00, 79.15ex/s]
+preprocess dataset #7:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #8:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #9:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #10:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #11:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #12:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #13:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #14:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #15:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #16:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #17:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #18:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #19:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #20:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #21:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #22:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #23:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #24:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #25:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #26:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #27:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #28:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #29:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #30:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #28:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍      | 1228/1266 [00:38<00:01, 31.53ex/s]
+preprocess dataset #28:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎     | 1233/1266 [00:38<00:00, 33.71ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 1263/1266 [00:38<00:00, 32.34ex/s]
+preprocess dataset #11:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 1258/1267 [00:40<00:00, 34.62ex/s]
+preprocess dataset #27: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1261/1266 [00:39<00:00, 31.59ex/s]
+preprocess dataset #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1262/1267 [00:40<00:00, 32.88ex/s]
+preprocess dataset #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1266/1267 [00:40<00:00, 31.90ex/s]
+preprocess dataset #14:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎        | 1216/1267 [00:40<00:01, 34.79ex/s]
+preprocess dataset #14:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉        | 1220/1267 [00:40<00:01, 34.48ex/s]
+preprocess dataset #16:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍         | 1211/1267 [00:40<00:01, 33.14ex/s]
+preprocess dataset #16:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊        | 1219/1267 [00:40<00:01, 35.28ex/s]
+preprocess dataset #14:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 1245/1267 [00:40<00:00, 61.31ex/s]
+preprocess dataset #31:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1259/1266 [00:39<00:00, 28.85ex/s]
+preprocess dataset #16:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋      | 1230/1267 [00:40<00:00, 38.81ex/s]
+preprocess dataset #28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1266/1266 [00:39<00:00, 27.98ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1261/1266 [00:39<00:00, 31.09ex/s]
+preprocess dataset #31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1266/1266 [00:39<00:00, 28.88ex/s]
+preprocess dataset #8:   0%|                                                                                                                                                                                                                                     | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #9:   0%|                                                                                                                                                                                                                                     | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #10:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #11:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #12:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #13:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #14:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #15:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #16:   2%|████▍                                                                                                                                                                                                                      | 52/2554 [00:02<01:09, 35.88ex/s]
+preprocess dataset #17:   2%|████                                                                                                                                                                                                                       | 47/2554 [00:02<01:14, 33.80ex/s]
+preprocess dataset #18:   2%|███▌                                                                                                                                                                                                                       | 42/2554 [00:02<01:17, 32.55ex/s]
+preprocess dataset #19:   2%|████                                                                                                                                                                                                                       | 48/2554 [00:02<01:08, 36.33ex/s]
+preprocess dataset #20:   1%|███                                                                                                                                                                                                                        | 36/2554 [00:02<01:20, 31.47ex/s]
+preprocess dataset #21:   2%|███▍                                                                                                                                                                                                                       | 40/2554 [00:01<01:15, 33.15ex/s]
+preprocess dataset #22:   2%|███▌                                                                                                                                                                                                                       | 41/2554 [00:02<01:15, 33.14ex/s]
+preprocess dataset #23:   1%|██▋                                                                                                                                                                                                                        | 32/2554 [00:01<01:28, 28.45ex/s]
+preprocess dataset #24:   1%|██▉                                                                                                                                                                                                                        | 34/2554 [00:01<01:18, 32.15ex/s]
+preprocess dataset #25:   1%|██▋                                                                                                                                                                                                                        | 31/2554 [00:01<01:29, 28.33ex/s]
+preprocess dataset #26:   1%|██▋                                                                                                                                                                                                                        | 31/2554 [00:01<01:22, 30.54ex/s]
+preprocess dataset #27:   1%|███▏                                                                                                                                                                                                                       | 37/2554 [00:01<01:17, 32.53ex/s]
+preprocess dataset #28:   1%|██▌                                                                                                                                                                                                                        | 30/2554 [00:01<01:23, 30.39ex/s]
+preprocess dataset #29:   1%|██▋                                                                                                                                                                                                                        | 32/2554 [00:01<01:23, 30.15ex/s]
+preprocess dataset #30:   1%|██▍                                                                                                                                                                                                                        | 29/2554 [00:01<01:26, 29.05ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #14:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                     | 2300/2554 [01:12<00:07, 32.46ex/s]
+preprocess dataset #15:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                     | 2307/2554 [01:12<00:07, 33.12ex/s]
+preprocess dataset #16:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                       | 2281/2554 [01:12<00:07, 34.88ex/s]
+preprocess dataset #17:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                        | 2273/2554 [01:12<00:08, 32.99ex/s]
+preprocess dataset #18:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                    | 2315/2554 [01:12<00:07, 30.17ex/s]
+preprocess dataset #19:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                        | 2269/2554 [01:12<00:08, 33.19ex/s]
+preprocess dataset #20:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                        | 2264/2554 [01:12<00:08, 33.04ex/s]
+preprocess dataset #21:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                      | 2287/2554 [01:12<00:09, 28.98ex/s]
+preprocess dataset #22:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                        | 2270/2554 [01:12<00:08, 31.66ex/s]
+preprocess dataset #23:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                       | 2280/2554 [01:12<00:08, 33.45ex/s]
+preprocess dataset #24:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                      | 2286/2554 [01:11<00:08, 33.07ex/s]
+preprocess dataset #25:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                        | 2264/2554 [01:11<00:09, 32.12ex/s]
+preprocess dataset #26:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                        | 2267/2554 [01:11<00:09, 31.11ex/s]
+preprocess dataset #27:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                        | 2271/2554 [01:11<00:08, 31.61ex/s]
+preprocess dataset #28:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                        | 2264/2554 [01:11<00:09, 31.48ex/s]
+preprocess dataset #29:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                        | 2268/2554 [01:11<00:08, 34.33ex/s]
+preprocess dataset #30:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                        | 2269/2554 [01:11<00:08, 33.21ex/s]
+
+
+preprocess dataset #27:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋     | 2492/2554 [01:18<00:01, 32.80ex/s]
+preprocess dataset #28:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍     | 2488/2554 [01:18<00:01, 35.77ex/s]
+preprocess dataset #29:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏    | 2498/2554 [01:18<00:01, 42.65ex/s]
+preprocess dataset #30:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌     | 2490/2554 [01:18<00:01, 42.62ex/s]
+preprocess dataset #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 2543/2554 [01:19<00:00, 32.99ex/s]
+preprocess dataset #17: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2550/2554 [01:20<00:00, 32.73ex/s]
+preprocess dataset #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 2552/2554 [01:20<00:00, 33.79ex/s]
+preprocess dataset #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2551/2554 [01:20<00:00, 33.13ex/s]
+preprocess dataset #20: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 2546/2554 [01:20<00:00, 34.53ex/s]
+preprocess dataset #17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2554/2554 [01:20<00:00, 32.40ex/s]
+preprocess dataset #25:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 2530/2554 [01:19<00:00, 32.92ex/s]
+preprocess dataset #20: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2550/2554 [01:20<00:00, 34.75ex/s]
+preprocess dataset #26: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2551/2554 [01:19<00:00, 32.30ex/s]
+preprocess dataset #25:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 2538/2554 [01:19<00:00, 32.16ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 2541/2554 [01:19<00:00, 30.29ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 2545/2554 [01:19<00:00, 32.98ex/s]
+preprocess dataset #22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 2542/2554 [01:20<00:00, 34.10ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 2545/2554 [01:19<00:00, 32.10ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 2549/2554 [01:19<00:00, 33.86ex/s]
+preprocess dataset #25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 2542/2554 [01:20<00:00, 29.40ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 2553/2554 [01:19<00:00, 33.24ex/s]
+preprocess dataset #27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2554/2554 [01:20<00:00, 30.88ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 2549/2554 [01:19<00:00, 31.66ex/s]
+preprocess dataset #25: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 2549/2554 [01:20<00:00, 22.10ex/s]
+#3: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 553.43ba/s]
+#4: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 550.56ba/s]
+#5: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 520.92ba/s]
+#6: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 532.08ba/s]
+#7: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 494.14ba/s]
+#8: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 535.92ba/s]
+#9: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 530.06ba/s]
+#10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 538.26ba/s]
+#11: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 481.05ba/s]
+#12: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 533.96ba/s]
+#13: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 540.35ba/s]
+#14: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 546.98ba/s]
+#15: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 545.66ba/s]
+#16: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 544.04ba/s]
+#17: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 533.37ba/s]
+#18: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 526.78ba/s]
+#19: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 518.89ba/s]
+#20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 479.31ba/s]
+#21: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 484.66ba/s]
+#22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 526.90ba/s]
+#23: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 525.50ba/s]
+#25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 537.08ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 543.13ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 524.61ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 535.80ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 541.55ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 533.93ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 545.51ba/s]
+#24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 569.28ba/s]
+#19:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 271.92ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 276.49ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 207.59ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 268.83ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 270.77ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 195.24ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 272.30ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 231.51ba/s]
+#0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 286.68ba/s]
+#1: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 284.66ba/s]
+#2: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 300.45ba/s]
+#3: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 296.33ba/s]
+#5: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 312.84ba/s]
+#4: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 214.00ba/s]
+#7: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 317.55ba/s]
+#6: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 242.17ba/s]
+#8: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 320.67ba/s]
+#9: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 315.56ba/s]
+#10: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 313.89ba/s]
+#11: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 317.69ba/s]
+#8:   0%|                                                                                                                                                                                                                                                           | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#9:   0%|                                                                                                                                                                                                                                                           | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#10:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#11:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#12:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#13:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#14:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 115.94ba/s]
+#17: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 92.64ba/s]
+#21: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 115.39ba/s]
+#13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 67.39ba/s]
+#16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 71.44ba/s]
+#15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 72.84ba/s]
+#11:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                        | 9/10 [00:00<00:00, 85.71ba/s]
+#19:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#13:  70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                        | 7/10 [00:00<00:00, 62.12ba/s]
+#15:  70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                        | 7/10 [00:00<00:00, 64.20ba/s]
+#17:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                        | 9/10 [00:00<00:00, 87.30ba/s]
+#24:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#22:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                | 8/10 [00:00<00:00, 75.82ba/s]
+#12: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 104.18ba/s]
+#13: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 99.14ba/s]
+#14: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 102.15ba/s]
+#15: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 103.00ba/s]
+#16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 104.78ba/s]
+#17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 100.83ba/s]
+#18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 104.52ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 100.93ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 104.15ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 104.81ba/s]
+#12:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#13:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#14:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#15:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#16:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#2: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 117.78ba/s]
+#4: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 115.27ba/s]
+#3: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 79.39ba/s]
+#5: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 117.10ba/s]
+#6: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 112.58ba/s]
+#7: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 117.44ba/s]
+#8: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 115.41ba/s]
+#9: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 110.52ba/s]
+#10: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 117.86ba/s]
+#11: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 116.14ba/s]
+#12: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 112.79ba/s]
+#13: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 115.56ba/s]
+#14: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 116.14ba/s]
+#15: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 115.49ba/s]
+#16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 114.75ba/s]
+#17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 112.55ba/s]
+#18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 117.98ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 117.54ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 117.22ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 117.05ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 115.24ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 115.29ba/s]
+#24: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 91.48ba/s]
+#25: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 75.37ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 123.76ba/s]
+#26: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 76.39ba/s]
+#27: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 88.24ba/s]
+#28: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 89.31ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 122.07ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 124.92ba/s]
+https://symbolize.stripped_domain/r/?trace=7f7039b753f4,7f7039bc90bf,7f,bd3eb5c03ce532d1&map=
+*** SIGTERM received by PID 2092896 (TID 2092896) on cpu 75 from PID 3085831; stack trace: ***                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+PC: @     0x7f7039b753f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7f6dea3ae294        976  (unknown)                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+    @     0x7f7039bc90c0  998272144  (unknown)
+    @               0x80  (unknown)  (unknown)                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+    @ 0xbd3eb5c03ce532d2  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7f7039b753f4,7f6dea3ae293,7f7039bc90bf,7f,bd3eb5c03ce532d1&map=fbcd4e3f2be272741f2aecd9d840a066:7f6dd4e11000-7f6dea740c60                                                                                                | 0/3 [00:00<?, ?ba/s]
+E0730 11:50:53.008219 2092896 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0730 11:50:53.087245 2092896 process_state.cc:774] RAW: Raising signal 15 with default behavior                                                                                                                                                                    | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+
+    "Wav2Vec2ForPreTraining"                                                                                                                                                                                                                                        | 0/3 [00:00<?, ?ba/s]
+  ],
+  "attention_dropout": 0.094,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.04,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": true,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.047,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.041,
+  "mask_feature_length": 64,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.25,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.082,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 38,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 39,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file ./preprocessor_config.json
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+2022-07-30 11:50:54.715424: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
+2022-07-30 11:50:54.715480: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
+/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+run_flax_speech_recognition_ctc.py:337: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+INFO:__main__:***** Running training *****
+INFO:__main__:  Num examples = 302693
+INFO:__main__:  Num Epochs = 40
+INFO:__main__:  Instantaneous batch size per device = 24
+INFO:__main__:  Num gradient accumulation steps = 1
+INFO:__main__:  Total train batch size (w. parallel & distributed) = 192
+INFO:__main__:  Total optimization steps = 63040
+INFO:__main__:  Gradient checkpointing: True
+INFO:__main__:  Use scan: False
+INFO:__main__:  Fuse matmuls: False
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [00:00<?, ?it/s]
+
+  grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)                                                                                                                                                                                     | 0/1576 [00:00<?, ?it/s]
+run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+run_flax_speech_recognition_ctc.py:1392: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+run_flax_speech_recognition_ctc.py:1399: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+Training...:   0%|                                                                                                                                                                                                                                               | 0/1576 [04:31<?, ?it/s]
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [04:39<?, ?it/s]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback
+    return fun(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss
+    out_tree, out_flat = f_pmapped_(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f
+    out = pxla.xla_pmap(
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind
+    return map_bind(self, fun, *args, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind
+    outs = primitive.process(top_trace, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process
+    return trace.process_map(self, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call
+    return primitive.impl(f, *tracers, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 829, in xla_pmap_impl
+    compiled_fun, fingerprint = parallel_callable(
+  File "/data/flax/lib/python3.8/site-packages/jax/linear_util.py", line 295, in memoized_fun
+    ans = call(fun, *args)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 860, in parallel_callable
+    pmap_executable = pmap_computation.compile()
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper
+    return func(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1137, in compile
+    self._executable = PmapExecutable.from_hlo(self._hlo, **self.compile_args)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1285, in from_hlo
+    compiled = dispatch.compile_or_get_cached(
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 899, in compile_or_get_cached
+    return backend_compile(backend, computation, compile_options, host_callbacks)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper
+    return func(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/dispatch.py", line 843, in backend_compile
+    return backend.compile(built_c, compile_options=options)
+jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 15.59G of 15.48G hbm. Exceeded hbm capacity by 108.18M.
+Total hbm usage >= 16.11G:
+    reserved        530.00M
+    program           8.36G
+    arguments         7.23G
+Output size 7.17G; shares 7.17G with arguments.
+Program hbm requirement 8.36G:
+    global           260.0K
+    scoped           69.20M
+    HLO temp          7.31G (98.6% utilization: Unpadded (6.97G) Padded (7.07G), 3.3% fragmentation (246.53M))
+    overlays        1003.68M
+  Largest program allocations in hbm:
+  1. Size: 1003.68M
+     XLA label: overlays
+     Allocation type: overlays
+     ==========================
+  2. Size: 750.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95
+     Shape: bf16[24,16,999,999]{2,3,1,0:T(8,128)(2,1)}
+     Unpadded size: 730.96M
+     Extra memory due to padding: 19.04M (1.0x expansion)
+     XLA label: fusion.23118 = fusion(fusion.15347, bitcast.675, bitcast.677), kind=kOutput, calls=fused_computation.21200
+     Allocation type: HLO temp
+     ==========================
+  3. Size: 750.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/jit(_einsum)/dot_general[dimension_numbers=(((2,), (3,)), ((0, 1), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/wav2vec2-1b-npsc-nst/models/modeling_flax_wav2vec2.py" source_line=387
+     Shape: bf16[24,16,999,999]{2,3,1,0:T(8,128)(2,1)}
+     Unpadded size: 730.96M
+     Extra memory due to padding: 19.04M (1.0x expansion)
+     XLA label: fusion.372 = fusion(get-tuple-element.11691, get-tuple-element.11690, fusion.13842, bitcast.685, ...(+1)), kind=kOutput, calls=fused_computation.371
+     Allocation type: HLO temp
+     ==========================
+  4. Size: 750.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/mul" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=107
+     Shape: bf16[24,16,999,999]{2,3,1,0:T(8,128)(2,1)}
+     Unpadded size: 730.96M
+     Extra memory due to padding: 19.04M (1.0x expansion)
+     XLA label: fusion.371 = fusion(get-tuple-element.11695, get-tuple-element.11691, get-tuple-element.11690, negate.140, ...(+1)), kind=kLoop, calls=fused_computation.370
+     Allocation type: HLO temp
+     ==========================
+  5. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/dropout/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/stochastic.py" source_line=69
+     Shape: u32[12,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 19.51M (1.3x expansion)
+     XLA label: fusion.6015 = fusion(xor.5514, bitcast.32, fusion.5702, bitcast.31, ...(+3)), kind=kLoop, calls=fused_computation.5858
+     Allocation type: HLO temp
+     ==========================
+  6. Size: 67.50M
+     Shape: bf16[24,1127,16,80]{1,3,2,0:T(8,128)(2,1)}
+     Unpadded size: 66.04M
+     Extra memory due to padding: 1.46M (1.0x expansion)
+     XLA label: copy.7694.remat_compressed = copy(copy.7694)
+     Allocation type: HLO temp
+     ==========================
+  7. Size: 60.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/k_proj/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=200
+     Shape: bf16[24,999,1280]{1,2,0:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 1.46M (1.0x expansion)
+     XLA label: copy.8152.remat = copy(fusion.7393)
+     Allocation type: HLO temp
+     ==========================
+  8. Size: 60.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/div" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=93
+     Shape: bf16[24,999,1280]{1,2,0:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 1.46M (1.0x expansion)
+     XLA label: copy.8151.remat = copy(fusion.7395)
+     Allocation type: HLO temp
+     ==========================
+  9. Size: 58.59M
+     Shape: u32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.5948.remat_compressed = copy(fusion.5948)
+     Allocation type: HLO temp
+     ==========================
+  10. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/25/25/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.4030 = fusion(get-tuple-element.11572, convert.3623, convert.3624, fusion.3050), kind=kOutput, calls=fused_computation.3876
+     Allocation type: HLO temp
+     ==========================
+  11. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.4186 = fusion(fusion.6213, copy.9974, convert.4093, fusion.5973, ...(+1)), kind=kLoop, calls=fused_computation.4032
+     Allocation type: HLO temp
+     ==========================
+  12. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/41/41/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.3934 = fusion(get-tuple-element.11636, convert.3911, convert.3912, fusion.3082), kind=kOutput, calls=fused_computation.3780
+     Allocation type: HLO temp
+     ==========================
+  13. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/40/40/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.3940 = fusion(get-tuple-element.11632, convert.3895, convert.3896, fusion.3080), kind=kOutput, calls=fused_computation.3786
+     Allocation type: HLO temp
+     ==========================
+  14. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.4126 = fusion(get-tuple-element.11508, convert.4087, convert.4088, fusion.3018), kind=kOutput, calls=fused_computation.3972
+     Allocation type: HLO temp
+     ==========================
+  15. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/39/39/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.3946 = fusion(get-tuple-element.11628, convert.3863, convert.3864, fusion.3078), kind=kOutput, calls=fused_computation.3792
+     Allocation type: HLO temp
+     ==========================
+  16. Size: 58.54M
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: copy.9974 = copy(bitcast.9495)
+     Allocation type: HLO temp
+     ==========================
+  17. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/23/23/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.4042 = fusion(get-tuple-element.11564, convert.3591, convert.3592, fusion.3046), kind=kOutput, calls=fused_computation.3888
+     Allocation type: HLO temp
+     ==========================
+  18. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.4138 = fusion(get-tuple-element.11500, convert.4055, convert.4056, fusion.3014), kind=kOutput, calls=fused_computation.3984
+     Allocation type: HLO temp
+     ==========================
+  19. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/42/42/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.3928 = fusion(get-tuple-element.11640, convert.3927, convert.3928, fusion.3084), kind=kOutput, calls=fused_computation.3774
+     Allocation type: HLO temp
+     ==========================
+  20. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/10/10/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.4120 = fusion(get-tuple-element.11512, convert.3367, convert.3368, fusion.3020), kind=kOutput, calls=fused_computation.3966
+     Allocation type: HLO temp
+     ==========================
+The stack trace below excludes JAX-internal frames.
+The preceding is the original exception that occurred, unmodified.
+--------------------
+The above exception was the direct cause of the following exception:
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: XLA:TPU compile permanent error. Ran out of memory in memory space hbm. Used 15.59G of 15.48G hbm. Exceeded hbm capacity by 108.18M.
+Total hbm usage >= 16.11G:
+    reserved        530.00M
+    program           8.36G
+    arguments         7.23G
+Output size 7.17G; shares 7.17G with arguments.
+Program hbm requirement 8.36G:
+    global           260.0K
+    scoped           69.20M
+    HLO temp          7.31G (98.6% utilization: Unpadded (6.97G) Padded (7.07G), 3.3% fragmentation (246.53M))
+    overlays        1003.68M
+  Largest program allocations in hbm:
+  1. Size: 1003.68M
+     XLA label: overlays
+     Allocation type: overlays
+     ==========================
+  2. Size: 750.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/jit(_einsum)/dot_general[dimension_numbers=(((3,), (3,)), ((0, 2), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=95
+     Shape: bf16[24,16,999,999]{2,3,1,0:T(8,128)(2,1)}
+     Unpadded size: 730.96M
+     Extra memory due to padding: 19.04M (1.0x expansion)
+     XLA label: fusion.23118 = fusion(fusion.15347, bitcast.675, bitcast.677), kind=kOutput, calls=fused_computation.21200
+     Allocation type: HLO temp
+     ==========================
+  3. Size: 750.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/jit(_einsum)/dot_general[dimension_numbers=(((2,), (3,)), ((0, 1), (0, 2))) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/wav2vec2-1b-npsc-nst/models/modeling_flax_wav2vec2.py" source_line=387
+     Shape: bf16[24,16,999,999]{2,3,1,0:T(8,128)(2,1)}
+     Unpadded size: 730.96M
+     Extra memory due to padding: 19.04M (1.0x expansion)
+     XLA label: fusion.372 = fusion(get-tuple-element.11691, get-tuple-element.11690, fusion.13842, bitcast.685, ...(+1)), kind=kOutput, calls=fused_computation.371
+     Allocation type: HLO temp
+     ==========================
+  4. Size: 750.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/mul" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=107
+     Shape: bf16[24,16,999,999]{2,3,1,0:T(8,128)(2,1)}
+     Unpadded size: 730.96M
+     Extra memory due to padding: 19.04M (1.0x expansion)
+     XLA label: fusion.371 = fusion(get-tuple-element.11695, get-tuple-element.11691, get-tuple-element.11690, negate.140, ...(+1)), kind=kLoop, calls=fused_computation.370
+     Allocation type: HLO temp
+     ==========================
+  5. Size: 78.05M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/dropout/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/stochastic.py" source_line=69
+     Shape: u32[12,999,1280]{2,0,1:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 19.51M (1.3x expansion)
+     XLA label: fusion.6015 = fusion(xor.5514, bitcast.32, fusion.5702, bitcast.31, ...(+3)), kind=kLoop, calls=fused_computation.5858
+     Allocation type: HLO temp
+     ==========================
+  6. Size: 67.50M
+     Shape: bf16[24,1127,16,80]{1,3,2,0:T(8,128)(2,1)}
+     Unpadded size: 66.04M
+     Extra memory due to padding: 1.46M (1.0x expansion)
+     XLA label: copy.7694.remat_compressed = copy(copy.7694)
+     Allocation type: HLO temp
+     ==========================
+  7. Size: 60.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/k_proj/add" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=200
+     Shape: bf16[24,999,1280]{1,2,0:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 1.46M (1.0x expansion)
+     XLA label: copy.8152.remat = copy(fusion.7393)
+     Allocation type: HLO temp
+     ==========================
+  8. Size: 60.00M
+     Operator: op_name="pmap(train_step)/jit(main)/transpose(jvp(FlaxWav2Vec2ForCTCModule))/wav2vec2/encoder/layers/43/remat(core_fn)/43/attention/div" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/attention.py" source_line=93
+     Shape: bf16[24,999,1280]{1,2,0:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 1.46M (1.0x expansion)
+     XLA label: copy.8151.remat = copy(fusion.7395)
+     Allocation type: HLO temp
+     ==========================
+  9. Size: 58.59M
+     Shape: u32[12,999,1280]{2,1,0:T(8,128)}
+     Unpadded size: 58.54M
+     Extra memory due to padding: 60.0K (1.0x expansion)
+     XLA label: fusion.5948.remat_compressed = copy(fusion.5948)
+     Allocation type: HLO temp
+     ==========================
+  10. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/25/25/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.4030 = fusion(get-tuple-element.11572, convert.3623, convert.3624, fusion.3050), kind=kOutput, calls=fused_computation.3876
+     Allocation type: HLO temp
+     ==========================
+  11. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/0/0/layer_norm/reduce_sum[axes=(2,)]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/normalization.py" source_line=82
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.4186 = fusion(fusion.6213, copy.9974, convert.4093, fusion.5973, ...(+1)), kind=kLoop, calls=fused_computation.4032
+     Allocation type: HLO temp
+     ==========================
+  12. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/41/41/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.3934 = fusion(get-tuple-element.11636, convert.3911, convert.3912, fusion.3082), kind=kOutput, calls=fused_computation.3780
+     Allocation type: HLO temp
+     ==========================
+  13. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/40/40/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.3940 = fusion(get-tuple-element.11632, convert.3895, convert.3896, fusion.3080), kind=kOutput, calls=fused_computation.3786
+     Allocation type: HLO temp
+     ==========================
+  14. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/9/9/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.4126 = fusion(get-tuple-element.11508, convert.4087, convert.4088, fusion.3018), kind=kOutput, calls=fused_computation.3972
+     Allocation type: HLO temp
+     ==========================
+  15. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/39/39/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.3946 = fusion(get-tuple-element.11628, convert.3863, convert.3864, fusion.3078), kind=kOutput, calls=fused_computation.3792
+     Allocation type: HLO temp
+     ==========================
+  16. Size: 58.54M
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: copy.9974 = copy(bitcast.9495)
+     Allocation type: HLO temp
+     ==========================
+  17. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/23/23/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.4042 = fusion(get-tuple-element.11564, convert.3591, convert.3592, fusion.3046), kind=kOutput, calls=fused_computation.3888
+     Allocation type: HLO temp
+     ==========================
+  18. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/7/7/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.4138 = fusion(get-tuple-element.11500, convert.4055, convert.4056, fusion.3014), kind=kOutput, calls=fused_computation.3984
+     Allocation type: HLO temp
+     ==========================
+  19. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/42/42/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.3928 = fusion(get-tuple-element.11640, convert.3927, convert.3928, fusion.3084), kind=kOutput, calls=fused_computation.3774
+     Allocation type: HLO temp
+     ==========================
+  20. Size: 58.54M
+     Operator: op_name="pmap(train_step)/jit(main)/jvp(FlaxWav2Vec2ForCTCModule)/wav2vec2/encoder/layers/10/10/feed_forward/output_dense/dot_general[dimension_numbers=(((2,), (0,)), ((), ())) precision=(<Precision.DEFAULT: 0>, <Precision.DEFAULT: 0>) preferred_element_type=None]" source_file="/data/flax/lib/python3.8/site-packages/flax/linen/linear.py" source_line=196
+     Shape: bf16[24,999,1280]{2,0,1:T(8,128)(2,1)}
+     Unpadded size: 58.54M
+     XLA label: fusion.4120 = fusion(get-tuple-element.11512, convert.3367, convert.3368, fusion.3020), kind=kOutput, calls=fused_computation.3966
+     Allocation type: HLO temp
+     ==========================
\ No newline at end of file
diff --git a/wandb/run-20220730_113845-2hglxdx5/files/requirements.txt b/wandb/run-20220730_113845-2hglxdx5/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab
--- /dev/null
+++ b/wandb/run-20220730_113845-2hglxdx5/files/requirements.txt
@@ -0,0 +1,158 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+exceptiongroup==1.0.0rc8
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+hypothesis==6.53.0
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+jiwer==2.3.0
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pyctcdecode==0.4.0
+pygments==2.11.1
+pygtrie==2.5.0
+pyparsing==3.0.6
+python-dateutil==2.8.2
+python-levenshtein==0.12.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+sortedcontainers==2.4.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220730_113845-2hglxdx5/files/wandb-metadata.json b/wandb/run-20220730_113845-2hglxdx5/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..f621797030f9d0f561a2f1d05f7913e14e57cce2
--- /dev/null
+++ b/wandb/run-20220730_113845-2hglxdx5/files/wandb-metadata.json
@@ -0,0 +1,67 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-30T11:38:49.614640",
+    "startedAt": "2022-07-30T11:38:45.979578",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=./",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=24",
+        "--per_device_eval_batch_size=24",
+        "--gradient_accumulation_steps=1",
+        "--precision=full_mixed",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json b/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..0bcd2640a40d13eaefc952e887ce52089db5ac91
--- /dev/null
+++ b/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 1019}}
\ No newline at end of file
diff --git a/wandb/run-20220730_113845-2hglxdx5/logs/debug-internal.log b/wandb/run-20220730_113845-2hglxdx5/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..7e039a76dd1c062d1d83f1ab0533cb02cea5527f
--- /dev/null
+++ b/wandb/run-20220730_113845-2hglxdx5/logs/debug-internal.log
@@ -0,0 +1,577 @@
+2022-07-30 11:38:46,907 INFO    MainThread:3087125 [internal.py:wandb_internal():87] W&B internal server running at pid: 3087125, started at: 2022-07-30 11:38:46.906982
+2022-07-30 11:38:46,909 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: check_version
+2022-07-30 11:38:46,909 INFO    WriterThread:3087125 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/run-2hglxdx5.wandb
+2022-07-30 11:38:46,910 DEBUG   SenderThread:3087125 [sender.py:send():234] send: header
+2022-07-30 11:38:46,910 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: check_version
+2022-07-30 11:38:46,947 DEBUG   SenderThread:3087125 [sender.py:send():234] send: run
+2022-07-30 11:38:47,301 INFO    SenderThread:3087125 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files
+2022-07-30 11:38:47,301 INFO    SenderThread:3087125 [sender.py:_start_run_threads():804] run started: 2hglxdx5 with start time 1659181126
+2022-07-30 11:38:47,301 DEBUG   SenderThread:3087125 [sender.py:send():234] send: summary
+2022-07-30 11:38:47,301 INFO    SenderThread:3087125 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 11:38:47,302 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: run_start
+2022-07-30 11:38:48,304 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json
+2022-07-30 11:38:49,614 DEBUG   HandlerThread:3087125 [meta.py:__init__():40] meta init
+2022-07-30 11:38:49,614 DEBUG   HandlerThread:3087125 [meta.py:__init__():54] meta init done
+2022-07-30 11:38:49,614 DEBUG   HandlerThread:3087125 [meta.py:probe():214] probe
+2022-07-30 11:38:49,616 DEBUG   HandlerThread:3087125 [meta.py:_setup_git():204] setup git
+2022-07-30 11:38:49,653 DEBUG   HandlerThread:3087125 [meta.py:_setup_git():211] setup git done
+2022-07-30 11:38:49,653 DEBUG   HandlerThread:3087125 [meta.py:_save_code():92] save code
+2022-07-30 11:38:49,666 DEBUG   HandlerThread:3087125 [meta.py:_save_code():113] save code done
+2022-07-30 11:38:49,666 DEBUG   HandlerThread:3087125 [meta.py:_save_patches():130] save patches
+2022-07-30 11:38:49,740 DEBUG   HandlerThread:3087125 [meta.py:_save_patches():172] save patches done
+2022-07-30 11:38:49,740 DEBUG   HandlerThread:3087125 [meta.py:_save_pip():58] save pip
+2022-07-30 11:38:49,741 DEBUG   HandlerThread:3087125 [meta.py:_save_pip():72] save pip done
+2022-07-30 11:38:49,741 DEBUG   HandlerThread:3087125 [meta.py:probe():252] probe done
+2022-07-30 11:38:49,744 DEBUG   SenderThread:3087125 [sender.py:send():234] send: files
+2022-07-30 11:38:49,744 INFO    SenderThread:3087125 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-30 11:38:49,744 INFO    SenderThread:3087125 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-30 11:38:49,745 INFO    SenderThread:3087125 [sender.py:_save_file():939] saving file diff.patch with policy now
+2022-07-30 11:38:49,750 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:38:49,750 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:38:50,213 INFO    Thread-11 :3087125 [upload_job.py:push():137] Uploaded file /tmp/tmpqzgmy2k7wandb/fzrkrmxo-wandb-metadata.json
+2022-07-30 11:38:50,278 INFO    Thread-13 :3087125 [upload_job.py:push():137] Uploaded file /tmp/tmpqzgmy2k7wandb/2c98i3yk-diff.patch
+2022-07-30 11:38:50,305 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/wandb-metadata.json
+2022-07-30 11:38:50,305 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/code/run_flax_speech_recognition_ctc.py
+2022-07-30 11:38:50,305 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:38:50,305 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/requirements.txt
+2022-07-30 11:38:50,305 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/diff.patch
+2022-07-30 11:38:50,306 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/code
+2022-07-30 11:38:50,435 INFO    Thread-12 :3087125 [upload_job.py:push():137] Uploaded file /tmp/tmpqzgmy2k7wandb/11tmoa1l-code/run_flax_speech_recognition_ctc.py
+2022-07-30 11:38:52,306 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:38:54,306 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:38:56,308 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:38:58,309 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:39:04,312 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:39:04,903 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:39:04,903 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:39:06,313 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:39:17,697 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:39:20,035 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:39:20,035 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:39:20,319 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:39:22,320 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:39:33,326 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:39:35,185 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:39:35,185 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:39:35,327 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:39:37,328 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:39:47,773 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:39:49,333 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:39:50,318 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:39:50,319 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:39:51,334 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:40:05,481 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:40:05,481 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:40:17,851 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:40:20,655 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:40:20,655 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:40:32,352 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:40:35,353 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:40:35,834 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:40:35,834 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:40:37,354 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:40:39,355 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:40:41,356 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:40:43,357 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:40:45,357 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:40:47,358 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:40:47,918 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:40:49,359 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:40:51,011 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:40:51,011 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:40:51,360 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:40:53,361 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:40:55,362 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:40:57,363 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:40:59,364 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:01,365 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:03,366 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:05,367 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:06,165 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:41:06,165 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:41:07,368 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:09,368 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:11,369 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:13,370 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:15,372 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:17,373 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:18,003 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:41:19,373 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:21,320 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:41:21,320 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:41:21,374 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:23,375 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:25,376 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:27,377 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:29,378 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:31,379 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:33,380 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:35,381 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:36,463 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:41:36,463 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:41:37,382 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:39,383 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:41,384 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:43,385 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:45,386 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:47,387 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:48,082 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:41:49,389 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:51,390 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:51,606 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:41:51,606 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:41:53,391 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:55,392 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:57,393 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:41:59,394 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:01,395 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:03,402 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:05,397 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:06,746 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:42:06,746 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:42:07,398 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:09,400 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:11,400 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:13,401 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:15,402 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:17,403 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:18,159 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:42:19,404 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:21,405 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:21,926 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:42:21,926 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:42:23,406 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:25,407 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:27,408 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:29,409 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:31,410 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:33,411 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:35,412 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:37,069 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:42:37,070 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:42:37,413 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:39,414 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:41,415 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:43,416 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:45,417 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:47,417 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:48,248 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:42:50,419 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:52,212 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:42:52,212 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:42:52,420 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:54,421 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:56,422 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:42:58,423 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:00,424 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:02,425 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:04,426 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:06,427 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:07,373 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:43:07,373 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:43:08,429 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:10,430 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:12,430 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:14,432 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:16,432 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:18,340 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:43:18,433 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:20,435 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:22,436 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:22,519 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:43:22,519 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:43:24,437 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:26,438 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:28,438 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:30,440 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:32,441 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:34,442 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:36,443 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:37,681 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:43:37,681 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:43:38,444 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:40,445 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:42,450 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:44,451 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:46,451 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:48,428 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:43:48,452 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:50,453 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:52,454 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:52,858 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:43:52,858 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:43:54,455 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:56,456 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:43:58,457 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:00,458 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:02,459 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:04,460 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:06,461 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:08,010 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:44:08,011 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:44:08,462 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:10,463 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:12,464 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:14,465 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:16,466 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:18,467 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:18,507 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:44:20,468 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:22,469 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:23,154 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:44:23,155 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:44:24,470 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:26,472 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:28,472 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:30,474 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:32,474 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:34,475 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:36,479 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:38,297 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:44:38,298 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:44:38,480 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:40,480 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:42,481 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:44,482 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:46,483 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:48,484 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:48,597 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:44:50,485 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:52,486 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:53,438 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:44:53,438 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:44:54,487 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:56,488 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:44:58,489 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:00,490 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:02,491 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:04,492 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:06,493 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:08,494 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:08,579 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:45:08,579 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:45:10,495 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:12,496 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:14,497 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:16,498 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:18,499 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:18,676 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:45:20,500 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:22,501 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:23,730 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:45:23,730 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:45:24,502 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:26,503 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:28,504 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:30,506 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:32,506 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:34,507 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:36,508 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:38,509 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:38,869 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:45:38,869 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:45:40,510 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:42,511 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:44,512 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:46,513 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:48,516 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:48,757 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:45:50,517 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:52,518 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:54,005 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:45:54,006 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:45:54,519 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:56,521 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:45:58,521 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:46:00,522 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:46:02,523 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:46:04,524 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:46:06,525 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:46:09,152 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:46:09,190 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:46:18,828 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:46:24,330 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:46:24,330 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:46:39,508 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:46:39,508 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:46:43,543 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:46:45,544 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:46:47,551 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:46:48,896 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:46:49,551 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:46:51,553 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:46:53,553 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:46:54,766 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:46:54,766 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:46:55,554 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:46:57,561 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:46:59,562 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:47:01,563 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:47:03,564 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:47:05,564 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:47:07,565 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:47:09,566 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:47:09,928 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:47:09,928 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:47:11,567 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:47:13,568 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:47:15,569 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:47:17,570 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:47:18,970 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:47:19,571 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:47:21,574 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:47:23,575 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:47:25,072 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:47:25,072 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:47:40,207 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:47:40,208 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:47:49,048 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:47:55,679 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:47:55,679 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:48:04,593 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:06,593 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:08,594 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:10,595 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:10,936 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:48:10,936 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:48:12,596 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:14,597 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:16,598 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:18,599 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:19,123 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:48:20,601 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:22,602 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:24,603 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:26,091 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:48:26,091 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:48:26,604 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:28,605 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:30,606 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:32,607 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:34,608 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:38,610 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:40,611 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:41,259 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:48:41,259 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:48:42,613 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:44,613 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:46,614 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:48,615 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:49,207 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:48:50,616 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:52,617 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:54,618 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:56,410 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:48:56,410 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:48:56,619 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:48:58,620 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:49:00,621 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:49:02,622 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:49:04,623 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:49:06,624 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:49:08,627 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:49:10,628 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:49:11,555 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:49:11,555 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:49:12,629 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:49:14,631 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:49:16,631 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:49:18,632 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:49:19,279 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:49:20,633 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:49:22,634 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:49:24,635 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:49:26,637 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:49:26,749 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:49:26,750 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:49:41,888 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:49:41,889 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:49:49,351 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:49:57,024 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:49:57,024 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:50:11,655 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:50:12,314 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:50:12,314 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:50:19,422 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:50:22,660 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:50:27,572 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:50:27,572 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:50:28,663 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:50:36,667 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:50:38,668 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:50:42,895 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:50:42,895 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:50:44,670 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:50:49,497 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:50:55,675 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:50:57,676 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:50:58,125 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:50:58,125 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:51:01,679 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:51:03,680 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:51:05,681 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:51:13,638 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:51:13,638 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:51:13,684 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:51:19,571 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:51:28,990 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:51:28,990 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:51:44,191 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:51:44,192 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:51:49,645 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:51:57,703 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:51:59,375 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:51:59,375 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:51:59,704 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:52:05,707 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:52:07,708 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:52:14,540 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:52:14,541 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:52:19,717 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:52:29,807 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:52:29,808 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:52:45,798 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:52:45,798 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:52:49,791 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:53:00,937 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:53:00,937 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:53:16,078 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:53:16,078 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:53:19,866 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:53:31,211 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:53:31,211 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:53:46,345 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:53:46,345 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:53:49,945 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:54:01,479 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:54:01,480 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:54:16,617 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:54:16,618 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:54:20,021 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:54:31,751 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:54:31,752 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:54:46,896 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:54:46,897 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:54:50,094 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:55:02,034 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:55:02,035 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:55:17,175 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:55:17,175 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:55:20,168 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:55:32,318 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:55:32,318 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:55:45,805 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:55:46,764 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 11:55:46,764 DEBUG   SenderThread:3087125 [sender.py:send():234] send: telemetry
+2022-07-30 11:55:46,765 DEBUG   SenderThread:3087125 [sender.py:send():234] send: exit
+2022-07-30 11:55:46,765 INFO    SenderThread:3087125 [sender.py:send_exit():366] handling exit code: 1
+2022-07-30 11:55:46,766 INFO    SenderThread:3087125 [sender.py:send_exit():368] handling runtime: 1019
+2022-07-30 11:55:46,766 INFO    SenderThread:3087125 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 11:55:46,766 INFO    SenderThread:3087125 [sender.py:send_exit():374] send defer
+2022-07-30 11:55:46,766 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 11:55:46,767 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 11:55:46,767 INFO    HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-30 11:55:46,767 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: defer
+2022-07-30 11:55:46,767 INFO    SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-30 11:55:46,767 INFO    SenderThread:3087125 [sender.py:transition_state():387] send defer: 1
+2022-07-30 11:55:46,767 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 11:55:46,767 INFO    HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-30 11:55:46,806 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:55:46,806 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json
+2022-07-30 11:55:46,834 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: defer
+2022-07-30 11:55:46,834 INFO    SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-30 11:55:46,834 INFO    SenderThread:3087125 [sender.py:transition_state():387] send defer: 2
+2022-07-30 11:55:46,834 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 11:55:46,835 DEBUG   SenderThread:3087125 [sender.py:send():234] send: stats
+2022-07-30 11:55:46,835 INFO    HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-30 11:55:46,835 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: defer
+2022-07-30 11:55:46,835 INFO    SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-30 11:55:46,835 INFO    SenderThread:3087125 [sender.py:transition_state():387] send defer: 3
+2022-07-30 11:55:46,836 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 11:55:46,836 INFO    HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-30 11:55:46,836 DEBUG   SenderThread:3087125 [sender.py:send():234] send: summary
+2022-07-30 11:55:46,836 INFO    SenderThread:3087125 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 11:55:46,836 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: defer
+2022-07-30 11:55:46,836 INFO    SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-30 11:55:46,836 INFO    SenderThread:3087125 [sender.py:transition_state():387] send defer: 4
+2022-07-30 11:55:46,836 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 11:55:46,836 INFO    HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-30 11:55:46,837 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: defer
+2022-07-30 11:55:46,837 INFO    SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-30 11:55:46,869 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 11:55:46,995 INFO    SenderThread:3087125 [sender.py:transition_state():387] send defer: 5
+2022-07-30 11:55:46,995 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 11:55:46,995 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 11:55:46,995 INFO    HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-30 11:55:46,996 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: defer
+2022-07-30 11:55:46,996 INFO    SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-30 11:55:46,996 INFO    SenderThread:3087125 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-30 11:55:47,096 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 11:55:47,806 INFO    Thread-8  :3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/config.yaml
+2022-07-30 11:55:47,806 INFO    SenderThread:3087125 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json
+2022-07-30 11:55:47,807 INFO    SenderThread:3087125 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files
+2022-07-30 11:55:47,807 INFO    SenderThread:3087125 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/config.yaml config.yaml
+2022-07-30 11:55:47,807 INFO    SenderThread:3087125 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/diff.patch diff.patch
+2022-07-30 11:55:47,807 INFO    SenderThread:3087125 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/requirements.txt requirements.txt
+2022-07-30 11:55:47,808 INFO    SenderThread:3087125 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log output.log
+2022-07-30 11:55:47,810 INFO    SenderThread:3087125 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json wandb-summary.json
+2022-07-30 11:55:47,811 INFO    SenderThread:3087125 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/wandb-metadata.json wandb-metadata.json
+2022-07-30 11:55:47,814 INFO    SenderThread:3087125 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-30 11:55:47,814 INFO    SenderThread:3087125 [sender.py:transition_state():387] send defer: 6
+2022-07-30 11:55:47,814 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 11:55:47,817 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 11:55:47,817 INFO    HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-30 11:55:47,820 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: defer
+2022-07-30 11:55:47,820 INFO    SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-30 11:55:47,820 INFO    SenderThread:3087125 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 11:55:47,918 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 11:55:47,918 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 11:55:48,020 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 11:55:48,020 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 11:55:48,121 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 11:55:48,121 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 11:55:48,223 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 11:55:48,223 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 11:55:48,284 INFO    Thread-14 :3087125 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/config.yaml
+2022-07-30 11:55:48,286 INFO    Thread-17 :3087125 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/wandb-summary.json
+2022-07-30 11:55:48,324 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 11:55:48,325 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 11:55:48,348 INFO    Thread-15 :3087125 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/requirements.txt
+2022-07-30 11:55:48,426 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 11:55:48,426 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 11:55:48,496 INFO    Thread-16 :3087125 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/files/output.log
+2022-07-30 11:55:48,528 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 11:55:48,528 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 11:55:48,630 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 11:55:48,630 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 11:55:48,696 INFO    Thread-7  :3087125 [sender.py:transition_state():387] send defer: 7
+2022-07-30 11:55:48,696 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 11:55:48,697 INFO    HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-30 11:55:48,697 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: defer
+2022-07-30 11:55:48,697 INFO    SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-30 11:55:48,731 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 11:55:49,112 INFO    SenderThread:3087125 [sender.py:transition_state():387] send defer: 8
+2022-07-30 11:55:49,112 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 11:55:49,112 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 11:55:49,112 INFO    HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-30 11:55:49,113 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: defer
+2022-07-30 11:55:49,113 INFO    SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-30 11:55:49,113 INFO    SenderThread:3087125 [sender.py:transition_state():387] send defer: 9
+2022-07-30 11:55:49,113 DEBUG   SenderThread:3087125 [sender.py:send():234] send: final
+2022-07-30 11:55:49,113 DEBUG   SenderThread:3087125 [sender.py:send():234] send: footer
+2022-07-30 11:55:49,113 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 11:55:49,113 INFO    HandlerThread:3087125 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-30 11:55:49,114 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: defer
+2022-07-30 11:55:49,114 INFO    SenderThread:3087125 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-30 11:55:49,213 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 11:55:49,213 DEBUG   SenderThread:3087125 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 11:55:49,214 INFO    SenderThread:3087125 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 11:55:49,472 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-30 11:55:49,473 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-30 11:55:49,474 DEBUG   HandlerThread:3087125 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-30 11:55:49,474 INFO    HandlerThread:3087125 [handler.py:finish():731] shutting down handler
+2022-07-30 11:55:50,114 INFO    WriterThread:3087125 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/run-2hglxdx5.wandb
+2022-07-30 11:55:50,471 INFO    SenderThread:3087125 [sender.py:finish():1070] shutting down sender
+2022-07-30 11:55:50,471 INFO    SenderThread:3087125 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 11:55:50,471 INFO    SenderThread:3087125 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 11:55:50,474 INFO    MainThread:3087125 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220730_113845-2hglxdx5/logs/debug.log b/wandb/run-20220730_113845-2hglxdx5/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..5192a2616aed5fe7cb6304557316b8289bf41b74
--- /dev/null
+++ b/wandb/run-20220730_113845-2hglxdx5/logs/debug.log
@@ -0,0 +1,148 @@
+2022-07-30 11:38:45,981 INFO    MainThread:3085831 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-30 11:38:45,981 INFO    MainThread:3085831 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-30 11:38:45,981 INFO    MainThread:3085831 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/logs/debug.log
+2022-07-30 11:38:45,981 INFO    MainThread:3085831 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_113845-2hglxdx5/logs/debug-internal.log
+2022-07-30 11:38:45,981 INFO    MainThread:3085831 [wandb_init.py:init():404] calling init triggers
+2022-07-30 11:38:45,981 INFO    MainThread:3085831 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-30 11:38:45,981 INFO    MainThread:3085831 [wandb_init.py:init():460] starting backend
+2022-07-30 11:38:45,981 INFO    MainThread:3085831 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-30 11:38:46,030 INFO    MainThread:3085831 [backend.py:ensure_launched():216] starting backend process...
+2022-07-30 11:38:46,076 INFO    MainThread:3085831 [backend.py:ensure_launched():221] started backend process with pid: 3087125
+2022-07-30 11:38:46,078 INFO    MainThread:3085831 [wandb_init.py:init():469] backend started and connected
+2022-07-30 11:38:46,092 INFO    MainThread:3085831 [wandb_init.py:init():533] updated telemetry
+2022-07-30 11:38:46,206 INFO    MainThread:3085831 [wandb_init.py:init():563] communicating current version
+2022-07-30 11:38:46,946 INFO    MainThread:3085831 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-30 11:38:46,946 INFO    MainThread:3085831 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-30 11:38:47,302 INFO    MainThread:3085831 [wandb_init.py:init():606] starting run threads in backend
+2022-07-30 11:38:49,748 INFO    MainThread:3085831 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-30 11:38:49,749 INFO    MainThread:3085831 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-30 11:38:49,749 INFO    MainThread:3085831 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-30 11:38:49,751 INFO    MainThread:3085831 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-30 11:38:49,752 INFO    MainThread:3085831 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-30 11:55:44,218 INFO    MainThread:3085831 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-30 11:55:44,224 INFO    MainThread:3085831 [wandb_run.py:_restore():1752] restore
+2022-07-30 11:55:46,767 INFO    MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 74351
+}
+
+2022-07-30 11:55:46,995 INFO    MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 74351
+}
+
+2022-07-30 11:55:47,817 INFO    MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 419641
+}
+
+2022-07-30 11:55:47,919 INFO    MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 419641
+}
+
+2022-07-30 11:55:48,020 INFO    MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 419641
+  total_bytes: 419641
+}
+
+2022-07-30 11:55:48,122 INFO    MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 419641
+  total_bytes: 419641
+}
+
+2022-07-30 11:55:48,223 INFO    MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 419641
+  total_bytes: 419641
+}
+
+2022-07-30 11:55:48,325 INFO    MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 419641
+  total_bytes: 419641
+}
+
+2022-07-30 11:55:48,427 INFO    MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 419641
+  total_bytes: 419641
+}
+
+2022-07-30 11:55:48,529 INFO    MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 419641
+  total_bytes: 419641
+}
+
+2022-07-30 11:55:48,630 INFO    MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 419641
+  total_bytes: 419641
+}
+
+2022-07-30 11:55:49,112 INFO    MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 419641
+  total_bytes: 419641
+}
+
+2022-07-30 11:55:49,472 INFO    MainThread:3085831 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 419641
+  total_bytes: 419641
+}
+local_info {
+}
+
+2022-07-30 11:55:51,044 INFO    MainThread:3085831 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220730_113845-2hglxdx5/run-2hglxdx5.wandb b/wandb/run-20220730_113845-2hglxdx5/run-2hglxdx5.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..b37dd4ad70285d172c690390b73198d978868ea0
--- /dev/null
+++ b/wandb/run-20220730_113845-2hglxdx5/run-2hglxdx5.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d05d045fc74b77c5283c71e9b35293393394b98b30c2b916294e2cb576751a00
+size 472289
diff --git a/wandb/run-20220730_115718-1xckv47v/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_115718-1xckv47v/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac
--- /dev/null
+++ b/wandb/run-20220730_115718-1xckv47v/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1605 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220730_115718-1xckv47v/files/config.yaml b/wandb/run-20220730_115718-1xckv47v/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..af2d4af0ae6d32909b3c95e30f0f5be508b0001c
--- /dev/null
+++ b/wandb/run-20220730_115718-1xckv47v/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659182238
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220730_115718-1xckv47v/files/diff.patch b/wandb/run-20220730_115718-1xckv47v/files/diff.patch
new file mode 100644
index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642
--- /dev/null
+++ b/wandb/run-20220730_115718-1xckv47v/files/diff.patch
@@ -0,0 +1,10 @@
+diff --git a/.gitattributes b/.gitattributes
+index 755356a..f0eef4b 100644
+--- a/.gitattributes
++++ b/.gitattributes
+@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+ *.zip filter=lfs diff=lfs merge=lfs -text
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
++*.wandb filter=lfs diff=lfs merge=lfs -text
+\ No newline at end of file
diff --git a/wandb/run-20220730_115718-1xckv47v/files/output.log b/wandb/run-20220730_115718-1xckv47v/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..0902f5d44f1c27bf252d41f55c7fbfe0735856ee
--- /dev/null
+++ b/wandb/run-20220730_115718-1xckv47v/files/output.log
@@ -0,0 +1,1181 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul30_11-57-13_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=20,
+per_device_train_batch_size=20,
+precision=full_mixed,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 73.76it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 380.57it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_q', 'bias'), ('project_hid', 'bias'), ('quantizer', 'codevectors'), ('project_q', 'kernel'), ('quantizer', 'weight_proj', 'bias'), ('project_hid', 'kernel'), ('quantizer', 'weight_proj', 'kernel')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9040.69ex/s]
+removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8753.07ex/s]
+removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8857.64ex/s]
+removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8329.13ex/s]
+removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9201.53ex/s]
+removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8829.06ex/s]
+removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7897.68ex/s]
+removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8353.78ex/s]
+removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8645.86ex/s]
+removing punctuation from train split #4:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                    | 8519/9523 [00:01<00:00, 8100.65ex/s]
+removing punctuation from train split #4:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 9339/9523 [00:01<00:00, 8040.69ex/s]
+removing punctuation from train split #6:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋        | 9123/9523 [00:01<00:00, 8476.80ex/s]
+removing punctuation from train split #7:  92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉               | 8794/9523 [00:01<00:00, 8218.43ex/s]
+removing punctuation from train split #8:  65%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                    | 6218/9523 [00:00<00:00, 8866.88ex/s]
+removing punctuation from train split #9:  53%|████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                            | 5059/9523 [00:00<00:00, 7901.23ex/s]
+removing punctuation from train split #8:  75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                  | 7105/9523 [00:00<00:00, 8853.02ex/s]
+removing punctuation from train split #8:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌             | 8873/9523 [00:01<00:00, 8367.68ex/s]
+removing punctuation from train split #9:  80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                      | 7652/9523 [00:01<00:00, 6570.09ex/s]
+removing punctuation from train split #9:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                        | 8368/9523 [00:01<00:00, 6605.89ex/s]
+removing punctuation from train split #10:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍               | 8766/9523 [00:01<00:00, 8388.55ex/s]
+removing punctuation from train split #11:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                  | 8647/9523 [00:01<00:00, 8288.42ex/s]
+removing punctuation from train split #12:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍         | 9060/9522 [00:01<00:00, 9381.60ex/s]
+removing punctuation from train split #13:  81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                    | 7740/9522 [00:00<00:00, 7945.57ex/s]
+removing punctuation from train split #14:  72%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                      | 6881/9522 [00:00<00:00, 8757.71ex/s]
+removing punctuation from train split #15:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                       | 6822/9522 [00:00<00:00, 8696.29ex/s]
+removing punctuation from train split #16:  73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                     | 6915/9522 [00:00<00:00, 8780.45ex/s]
+removing punctuation from train split #17:  53%|███████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                             | 5005/9522 [00:00<00:00, 8535.15ex/s]
+removing punctuation from train split #18:  54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                          | 5140/9522 [00:00<00:00, 8692.31ex/s]
+removing punctuation from train split #19:  44%|██████████████████████████████████████████████████████████████████████████████████████                                                                                                              | 4183/9522 [00:00<00:00, 8514.79ex/s]
+removing punctuation from train split #20:  35%|████████████████████████████████████████████████████████████████████▊                                                                                                                               | 3342/9522 [00:00<00:00, 8442.54ex/s]
+removing punctuation from train split #21:  35%|████████████████████████████████████████████████████████████████████                                                                                                                                | 3308/9522 [00:00<00:00, 8375.72ex/s]
+removing punctuation from train split #22:  26%|███████████████████████████████████████████████████▉                                                                                                                                                | 2521/9522 [00:00<00:00, 8489.99ex/s]
+removing punctuation from train split #23:  26%|███████████████████████████████████████████████████▋                                                                                                                                                | 2514/9522 [00:00<00:00, 8496.01ex/s]
+removing punctuation from train split #24:  11%|█████████████████████▊                                                                                                                                                                              | 1062/9522 [00:00<00:01, 5187.20ex/s]
+removing punctuation from train split #25:  17%|██████████████████████████████████▏                                                                                                                                                                 | 1658/9522 [00:00<00:00, 8350.20ex/s]
+removing punctuation from train split #26:   7%|█████████████▎                                                                                                                                                                                       | 641/9522 [00:00<00:01, 6401.32ex/s]
+removing punctuation from train split #27:   0%|                                                                                                                                                                                                                 | 0/9522 [00:00<?, ?ex/s]
+removing punctuation from train split #28:   0%|                                                                                                                                                                                                                 | 0/9522 [00:00<?, ?ex/s]
+removing punctuation from train split #21:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                       | 6816/9522 [00:00<00:00, 8617.75ex/s]
+removing punctuation from train split #20:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                    | 8556/9522 [00:01<00:00, 8348.22ex/s]
+removing punctuation from train split #20:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 9454/9522 [00:01<00:00, 8534.64ex/s]
+removing punctuation from train split #21:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                   | 8587/9522 [00:01<00:00, 8470.95ex/s]
+removing punctuation from train split #21: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 9496/9522 [00:01<00:00, 8652.55ex/s]
+removing punctuation from train split #22:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                 | 8658/9522 [00:01<00:00, 8266.47ex/s]
+removing punctuation from train split #24:  63%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                        | 6027/9522 [00:00<00:00, 8030.55ex/s]
+removing punctuation from train split #23:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏            | 8899/9522 [00:01<00:00, 9049.91ex/s]
+removing punctuation from train split #25:  82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                  | 7843/9522 [00:00<00:00, 8856.38ex/s]
+removing punctuation from train split #24:  81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                    | 7745/9522 [00:01<00:00, 8294.78ex/s]
+removing punctuation from train split #24:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                   | 8578/9522 [00:01<00:00, 7963.91ex/s]
+removing punctuation from train split #24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 9476/9522 [00:01<00:00, 8260.71ex/s]
+removing punctuation from train split #26:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                       | 7608/9522 [00:00<00:00, 8824.40ex/s]
+removing punctuation from train split #26:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                     | 8505/9522 [00:01<00:00, 8866.83ex/s]
+removing punctuation from train split #26:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 9392/9522 [00:01<00:00, 8379.40ex/s]
+removing punctuation from train split #28:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                      | 8449/9522 [00:01<00:00, 8644.80ex/s]
+removing punctuation from train split #29:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                               | 7991/9522 [00:00<00:00, 9095.63ex/s]
+removing punctuation from train split #29:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏            | 8901/9522 [00:01<00:00, 8567.03ex/s]
+removing punctuation from train split #28:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋    | 9315/9522 [00:01<00:00, 8313.28ex/s]
+removing punctuation from train split #31:  75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                 | 7099/9522 [00:00<00:00, 9242.34ex/s]
+removing punctuation from train split #30:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎             | 8858/9522 [00:01<00:00, 8805.15ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00029_of_00032.arrow8973/9522 [00:01<00:00, 9004.82ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow
+preprocess dataset #0:   1%|█▊                                                                                                                                                                                                                          | 77/9497 [00:02<03:37, 43.22ex/s]
+preprocess dataset #1:   1%|█▎                                                                                                                                                                                                                          | 59/9497 [00:02<03:50, 40.89ex/s]
+preprocess dataset #2:   0%|▎                                                                                                                                                                                                                           | 11/9497 [00:00<09:54, 15.95ex/s]
+preprocess dataset #3:   1%|█▎                                                                                                                                                                                                                          | 58/9497 [00:01<03:48, 41.23ex/s]
+preprocess dataset #4:   0%|▍                                                                                                                                                                                                                           | 20/9497 [00:01<06:06, 25.86ex/s]
+preprocess dataset #5:   1%|██▋                                                                                                                                                                                                                        | 117/9497 [00:02<02:43, 57.30ex/s]
+preprocess dataset #6:   1%|█▊                                                                                                                                                                                                                          | 76/9497 [00:02<02:56, 53.47ex/s]
+preprocess dataset #7:   1%|██                                                                                                                                                                                                                          | 88/9497 [00:03<04:35, 34.19ex/s]
+preprocess dataset #8:   1%|█▎                                                                                                                                                                                                                          | 58/9497 [00:01<03:11, 49.20ex/s]
+preprocess dataset #9:   0%|▎                                                                                                                                                                                                                           | 14/9497 [00:01<07:43, 20.48ex/s]
+preprocess dataset #10:   0%|█                                                                                                                                                                                                                          | 44/9497 [00:02<05:13, 30.14ex/s]
+preprocess dataset #11:   0%|▎                                                                                                                                                                                                                          | 13/9496 [00:01<08:31, 18.54ex/s]
+preprocess dataset #12:   1%|█▌                                                                                                                                                                                                                         | 68/9496 [00:02<03:23, 46.36ex/s]
+preprocess dataset #13:   0%|▋                                                                                                                                                                                                                          | 32/9496 [00:01<04:13, 37.29ex/s]
+preprocess dataset #14:   1%|█▉                                                                                                                                                                                                                         | 86/9496 [00:02<03:47, 41.43ex/s]
+preprocess dataset #15:   0%|▌                                                                                                                                                                                                                          | 22/9496 [00:01<06:37, 23.86ex/s]
+preprocess dataset #16:   1%|█▉                                                                                                                                                                                                                         | 86/9496 [00:02<03:21, 46.74ex/s]
+preprocess dataset #17:   0%|▋                                                                                                                                                                                                                          | 32/9496 [00:01<04:26, 35.54ex/s]
+preprocess dataset #18:   1%|█▉                                                                                                                                                                                                                         | 85/9496 [00:02<03:37, 43.29ex/s]
+preprocess dataset #19:   0%|▋                                                                                                                                                                                                                          | 32/9496 [00:01<06:43, 23.48ex/s]
+preprocess dataset #20:   1%|█▍                                                                                                                                                                                                                         | 61/9496 [00:02<04:18, 36.46ex/s]
+preprocess dataset #21:   1%|██                                                                                                                                                                                                                         | 90/9496 [00:03<04:44, 33.12ex/s]
+preprocess dataset #22:   0%|▉                                                                                                                                                                                                                          | 43/9496 [00:01<04:53, 32.24ex/s]
+preprocess dataset #23:   1%|█▊                                                                                                                                                                                                                         | 80/9496 [00:03<05:29, 28.54ex/s]
+preprocess dataset #24:   0%|▉                                                                                                                                                                                                                          | 42/9496 [00:02<05:48, 27.14ex/s]
+preprocess dataset #25:   0%|                                                                                                                                                                                                                          | 1/9496 [00:00<2:17:10,  1.15ex/s]
+preprocess dataset #26:   0%|▍                                                                                                                                                                                                                          | 19/9496 [00:01<07:54, 19.98ex/s]
+preprocess dataset #27:   1%|█▏                                                                                                                                                                                                                         | 53/9496 [00:02<05:49, 27.00ex/s]
+preprocess dataset #28:   0%|▎                                                                                                                                                                                                                          | 15/9496 [00:01<08:50, 17.86ex/s]
+preprocess dataset #29:   1%|█▏                                                                                                                                                                                                                         | 53/9496 [00:02<05:46, 27.26ex/s]
+preprocess dataset #30:   1%|█▍                                                                                                                                                                                                                         | 64/9496 [00:02<04:36, 34.11ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #10:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌            | 8951/9497 [04:37<00:17, 30.63ex/s]
+preprocess dataset #11:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                             | 8198/9496 [04:36<00:47, 27.49ex/s]
+preprocess dataset #12:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                             | 8229/9496 [04:35<00:51, 24.39ex/s]
+preprocess dataset #13:  85%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                | 8098/9496 [04:35<00:58, 23.98ex/s]
+preprocess dataset #14:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                              | 8171/9496 [04:34<00:52, 25.01ex/s]
+preprocess dataset #15:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                  | 7999/9496 [04:32<00:43, 34.49ex/s]
+preprocess dataset #16:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                  | 8000/9496 [04:31<02:29,  9.98ex/s]
+preprocess dataset #17:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                   | 7949/9496 [04:30<01:02, 24.60ex/s]
+preprocess dataset #18:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                     | 7864/9496 [04:29<00:57, 28.48ex/s]
+preprocess dataset #19:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                  | 8695/9496 [04:29<00:20, 38.16ex/s]
+preprocess dataset #20:  82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                       | 7757/9496 [04:27<01:11, 24.34ex/s]
+preprocess dataset #21:  82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                       | 7783/9496 [04:26<01:07, 25.19ex/s]
+preprocess dataset #22:  82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                        | 7742/9496 [04:25<01:05, 26.88ex/s]
+preprocess dataset #23:  82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                        | 7747/9496 [04:24<01:13, 23.78ex/s]
+preprocess dataset #24:  81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                         | 7683/9496 [04:23<01:08, 26.60ex/s]
+preprocess dataset #25:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                           | 7603/9496 [04:21<00:56, 33.54ex/s]
+preprocess dataset #26:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                           | 7604/9496 [04:21<01:23, 22.58ex/s]
+preprocess dataset #27:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                            | 7566/9496 [04:19<01:05, 29.49ex/s]
+preprocess dataset #28:  79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                              | 7456/9496 [04:18<01:00, 33.85ex/s]
+preprocess dataset #29:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                         | 8374/9496 [04:17<00:31, 35.35ex/s]
+preprocess dataset #30:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                               | 7410/9496 [04:16<01:27, 23.83ex/s]
+
+
+
+
+
+preprocess dataset #30:  82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                       | 7785/9496 [04:28<00:44, 38.44ex/s]
+
+preprocess dataset #30:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                    | 7893/9496 [04:32<00:55, 28.64ex/s]
+preprocess dataset #30:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                   | 7962/9496 [04:34<00:50, 30.38ex/s]
+
+
+
+preprocess dataset #27:  88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                          | 8354/9496 [04:45<00:36, 31.02ex/s]
+preprocess dataset #28:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                            | 8236/9496 [04:44<00:39, 32.22ex/s]
+preprocess dataset #29:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎       | 9161/9496 [04:43<00:11, 28.55ex/s]
+preprocess dataset #30:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                              | 8187/9496 [04:42<00:41, 31.42ex/s]
+preprocess dataset #29:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉      | 9232/9496 [04:45<00:07, 37.41ex/s]
+preprocess dataset #30:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                            | 8254/9496 [04:44<00:36, 34.30ex/s]
+
+preprocess dataset #30:  89%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                         | 8406/9496 [04:48<00:31, 34.79ex/s]
+preprocess dataset #30:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                       | 8484/9496 [04:50<00:24, 41.87ex/s]
+preprocess dataset #30:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                       | 8489/9496 [04:50<00:25, 39.16ex/s]
+preprocess dataset #30:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                     | 8574/9496 [04:52<00:18, 50.07ex/s]
+preprocess dataset #28:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                 | 8723/9496 [04:57<00:16, 47.56ex/s]
+preprocess dataset #30:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                 | 8737/9496 [04:56<00:18, 42.02ex/s]
+preprocess dataset #30:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏               | 8805/9496 [04:58<00:15, 44.09ex/s]
+preprocess dataset #28:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌           | 8997/9496 [05:03<00:08, 57.30ex/s]
+preprocess dataset #30:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████              | 8888/9496 [05:00<00:13, 46.06ex/s]
+preprocess dataset #28:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏         | 9069/9496 [05:05<00:07, 57.59ex/s]
+preprocess dataset #30:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎           | 8983/9496 [05:02<00:10, 47.31ex/s]
+preprocess dataset #30:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋         | 9090/9496 [05:04<00:06, 59.04ex/s]
+preprocess dataset #26:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 9446/9496 [05:11<00:00, 63.51ex/s]
+preprocess dataset #27:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 9403/9496 [05:10<00:01, 63.56ex/s]
+preprocess dataset #28:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎    | 9292/9496 [05:09<00:03, 59.10ex/s]
+preprocess dataset #30:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎      | 9206/9496 [05:06<00:04, 63.55ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 9412/9496 [05:10<00:01, 75.56ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 9433/9496 [05:11<00:01, 54.91ex/s]
+preprocess dataset #30:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊    | 9312/9496 [05:08<00:03, 56.82ex/s]
+preprocess dataset #30:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 9391/9496 [05:09<00:01, 81.37ex/s]
+
+preprocess dataset #14:   3%|███████▎                                                                                                                                                                                                                   | 42/1267 [00:02<00:38, 31.76ex/s]
+preprocess dataset #15:   4%|████████                                                                                                                                                                                                                   | 47/1267 [00:02<00:34, 35.64ex/s]
+preprocess dataset #16:   3%|███████▍                                                                                                                                                                                                                   | 43/1267 [00:02<00:37, 32.40ex/s]
+preprocess dataset #17:   3%|███████▍                                                                                                                                                                                                                   | 43/1267 [00:02<00:36, 33.28ex/s]
+preprocess dataset #18:   3%|███████                                                                                                                                                                                                                    | 41/1267 [00:02<00:37, 32.60ex/s]
+preprocess dataset #19:   3%|██████▉                                                                                                                                                                                                                    | 40/1267 [00:02<00:36, 33.79ex/s]
+preprocess dataset #20:   3%|███████▌                                                                                                                                                                                                                   | 44/1266 [00:02<00:36, 33.25ex/s]
+preprocess dataset #21:   3%|███████                                                                                                                                                                                                                    | 41/1266 [00:01<00:37, 32.89ex/s]
+preprocess dataset #22:   3%|█████▌                                                                                                                                                                                                                     | 32/1266 [00:01<00:44, 27.82ex/s]
+preprocess dataset #23:   3%|██████▏                                                                                                                                                                                                                    | 36/1266 [00:01<00:34, 35.45ex/s]
+preprocess dataset #24:   3%|█████▋                                                                                                                                                                                                                     | 33/1266 [00:01<00:41, 29.71ex/s]
+preprocess dataset #25:   3%|█████▋                                                                                                                                                                                                                     | 33/1266 [00:01<00:36, 34.19ex/s]
+preprocess dataset #26:   2%|█████▎                                                                                                                                                                                                                     | 31/1266 [00:01<00:39, 30.92ex/s]
+preprocess dataset #27:   3%|██████                                                                                                                                                                                                                     | 35/1266 [00:01<00:37, 32.56ex/s]
+preprocess dataset #28:   3%|█████▉                                                                                                                                                                                                                     | 34/1266 [00:01<00:37, 32.77ex/s]
+preprocess dataset #29:   3%|█████▉                                                                                                                                                                                                                     | 34/1266 [00:01<00:37, 32.57ex/s]
+preprocess dataset #30:   2%|████▋                                                                                                                                                                                                                      | 27/1266 [00:01<00:45, 27.20ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                      | 1043/1266 [00:33<00:07, 29.05ex/s]
+
+
+preprocess dataset #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1262/1267 [00:40<00:00, 33.84ex/s]
+preprocess dataset #24:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 1257/1266 [00:39<00:00, 38.67ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 1263/1266 [00:39<00:00, 40.95ex/s]
+preprocess dataset #25: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1261/1266 [00:39<00:00, 27.60ex/s]
+preprocess dataset #21:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 1251/1266 [00:39<00:00, 37.10ex/s]
+preprocess dataset #21:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1259/1266 [00:40<00:00, 36.21ex/s]
+preprocess dataset #22:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 1255/1266 [00:40<00:00, 34.72ex/s]
+preprocess dataset #30:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋     | 1235/1266 [00:39<00:00, 31.88ex/s]
+preprocess dataset #27:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 1244/1266 [00:39<00:00, 30.64ex/s]
+preprocess dataset #26: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 1264/1266 [00:39<00:00, 37.30ex/s]
+preprocess dataset #27:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1259/1266 [00:39<00:00, 38.12ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 1264/1266 [00:39<00:00, 40.88ex/s]
+preprocess dataset #30:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 1252/1266 [00:39<00:00, 35.19ex/s]
+preprocess dataset #27: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 1264/1266 [00:40<00:00, 40.75ex/s]
+preprocess dataset #17:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #18:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #19:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #20:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #21:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #22:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #23:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #24:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #25:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #26:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #27:   2%|███▋                                                                                                                                                                                                                       | 43/2554 [00:02<01:15, 33.37ex/s]
+preprocess dataset #28:   2%|███▉                                                                                                                                                                                                                       | 46/2554 [00:02<01:09, 36.04ex/s]
+preprocess dataset #29:   2%|███▍                                                                                                                                                                                                                       | 40/2554 [00:02<01:19, 31.49ex/s]
+preprocess dataset #30:   2%|███▌                                                                                                                                                                                                                       | 42/2554 [00:02<01:18, 32.01ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #29:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊       | 2470/2554 [01:18<00:02, 34.27ex/s]
+preprocess dataset #31:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏        | 2450/2554 [01:17<00:03, 29.12ex/s]
+preprocess dataset #22:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍    | 2500/2554 [01:20<00:01, 35.84ex/s]
+preprocess dataset #23:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 2527/2554 [01:20<00:00, 40.37ex/s]
+preprocess dataset #24:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊     | 2493/2554 [01:20<00:01, 35.41ex/s]
+preprocess dataset #25:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 2532/2554 [01:20<00:00, 35.58ex/s]
+preprocess dataset #26:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊   | 2517/2554 [01:20<00:00, 39.34ex/s]
+preprocess dataset #27:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 2527/2554 [01:20<00:00, 35.74ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2548/2554 [01:20<00:00, 34.51ex/s]
+preprocess dataset #29:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 2534/2554 [01:20<00:00, 31.00ex/s]
+preprocess dataset #30:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 2533/2554 [01:20<00:00, 34.45ex/s]
+preprocess dataset #23: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 2549/2554 [01:21<00:00, 36.90ex/s]
+preprocess dataset #18: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2551/2554 [01:21<00:00, 37.49ex/s]
+preprocess dataset #20:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 2520/2554 [01:21<00:00, 34.50ex/s]
+preprocess dataset #22:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 2528/2554 [01:21<00:00, 35.12ex/s]
+preprocess dataset #20:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 2524/2554 [01:21<00:00, 35.30ex/s]
+preprocess dataset #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 2553/2554 [01:21<00:00, 35.71ex/s]
+preprocess dataset #20:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 2539/2554 [01:22<00:00, 39.91ex/s]
+preprocess dataset #20: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 2546/2554 [01:22<00:00, 47.61ex/s]
+preprocess dataset #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2547/2554 [01:21<00:00, 38.73ex/s]
+preprocess dataset #26: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 2553/2554 [01:21<00:00, 43.28ex/s]
+preprocess dataset #24:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 2538/2554 [01:21<00:00, 42.22ex/s]
+#5: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 538.21ba/s]
+#6: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 524.39ba/s]
+#0: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 304.56ba/s]
+#7: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 527.22ba/s]
+#2: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 315.61ba/s]
+#8: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 514.81ba/s]
+#9: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 523.74ba/s]
+#10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 519.46ba/s]
+#12: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 527.72ba/s]
+#13: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 528.70ba/s]
+#14: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 529.51ba/s]
+#11: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 344.61ba/s]
+#16: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 533.04ba/s]
+#17: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 537.30ba/s]
+#15: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 389.45ba/s]
+#18: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 533.65ba/s]
+#19: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 511.33ba/s]
+#20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 530.62ba/s]
+#21: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 524.30ba/s]
+#22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 485.27ba/s]
+#23: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 532.76ba/s]
+#24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 508.03ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 540.67ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 522.08ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 544.65ba/s]
+#25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 360.09ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 404.18ba/s]
+#18:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 253.48ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 266.06ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 248.54ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 281.48ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 296.10ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 345.31ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 345.94ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 283.37ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 125.75ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 127.30ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 130.13ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 128.57ba/s]
+https://symbolize.stripped_domain/r/?trace=7f7a984a93f4,7f7a984fd0bf,7f,3ccf238dbac5240f&map=                                                                                                                                                                      | 0/10 [00:00<?, ?ba/s]
+*** SIGTERM received by PID 1106021 (TID 1106021) on cpu 14 from PID 2099342; stack trace: ***
+PC: @     0x7f7a984a93f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7f7848cf1294        976  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @     0x7f7a984fd0c0  587312272  (unknown)
+    @               0x80  (unknown)  (unknown)
+    @ 0x3ccf238dbac52410  (unknown)  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+https://symbolize.stripped_domain/r/?trace=7f7a984a93f4,7f7848cf1293,7f7a984fd0bf,7f,3ccf238dbac5240f&map=fbcd4e3f2be272741f2aecd9d840a066:7f7833754000-7f7849083c60
+E0730 12:09:05.217039 1106021 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                              | 0/10 [00:00<?, ?ba/s]
+E0730 12:09:05.294450 1106021 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#9: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 89.52ba/s]
+#10: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 99.07ba/s]
+#12: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 107.98ba/s]
+#13: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 91.56ba/s]
+#14: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 108.32ba/s]
+#15: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 106.92ba/s]
+#16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 106.24ba/s]
+#17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 105.24ba/s]
+#18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 104.33ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 105.63ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 105.02ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 106.00ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.34ba/s]
+#23: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 95.21ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.37ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 110.07ba/s]
+#25: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 85.28ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 105.81ba/s]
+#28: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 73.26ba/s]
+#29: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 76.82ba/s]
+#30: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 90.27ba/s]
+#31: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 89.33ba/s]
+https://symbolize.stripped_domain/r/?trace=7f7a984a93f4,7f7a984fd0bf,7f,3ccf238dbac5240f&map=                                                                                                                                                                       | 0/2 [00:00<?, ?ba/s]
+*** SIGTERM received by PID 1106205 (TID 1106205) on cpu 61 from PID 2099342; stack trace: ***
+PC: @     0x7f7a984a93f4  (unknown)  do_futex_wait.constprop.0                                                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+    @     0x7f7848cf1294        976  (unknown)
+    @     0x7f7a984fd0c0  587312272  (unknown)                                                                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+    @               0x80  (unknown)  (unknown)
+    @ 0x3ccf238dbac52410  (unknown)  (unknown)                                                                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+https://symbolize.stripped_domain/r/?trace=7f7a984a93f4,7f7848cf1293,7f7a984fd0bf,7f,3ccf238dbac5240f&map=fbcd4e3f2be272741f2aecd9d840a066:7f7833754000-7f7849083c60                                                                                                | 0/2 [00:00<?, ?ba/s]
+E0730 12:09:13.104166 1106205 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0730 12:09:13.155895 1106205 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 110.15ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 155.75ba/s]
+#18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 108.87ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 113.33ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 101.15ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 111.21ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 108.40ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 109.19ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 112.73ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 109.90ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 117.25ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 118.92ba/s]
+#27: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 74.81ba/s]
+#28: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 78.83ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 111.76ba/s]
+https://symbolize.stripped_domain/r/?trace=7f7a984a93f4,7f7a984fd0bf,7f,3ccf238dbac5240f&map=
+*** SIGTERM received by PID 1106364 (TID 1106364) on cpu 6 from PID 2099342; stack trace: ***                                                                                                                                                                       | 0/3 [00:00<?, ?ba/s]
+PC: @     0x7f7a984a93f4  (unknown)  do_futex_wait.constprop.0                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+    @     0x7f7848cf1294        976  (unknown)                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+    @     0x7f7a984fd0c0  587312272  (unknown)
+    @               0x80  (unknown)  (unknown)
+    @ 0x3ccf238dbac52410  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7f7a984a93f4,7f7848cf1293,7f7a984fd0bf,7f,3ccf238dbac5240f&map=fbcd4e3f2be272741f2aecd9d840a066:7f7833754000-7f7849083c60
+E0730 12:09:21.326531 1106364 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                               | 0/3 [00:00<?, ?ba/s]
+E0730 12:09:21.376419 1106364 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#31:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 39,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file ./preprocessor_config.json
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+2022-07-30 12:09:22.954433: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
+2022-07-30 12:09:22.954469: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
+/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+run_flax_speech_recognition_ctc.py:337: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+INFO:__main__:***** Running training *****
+INFO:__main__:  Num examples = 302693
+INFO:__main__:  Num Epochs = 40
+INFO:__main__:  Instantaneous batch size per device = 20
+INFO:__main__:  Num gradient accumulation steps = 1
+INFO:__main__:  Total train batch size (w. parallel & distributed) = 160
+INFO:__main__:  Total optimization steps = 75640
+INFO:__main__:  Gradient checkpointing: True
+INFO:__main__:  Use scan: False
+INFO:__main__:  Fuse matmuls: False
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [00:00<?, ?it/s]
+  grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)                                                                                                                                                                                     | 0/1891 [00:00<?, ?it/s]
+run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+run_flax_speech_recognition_ctc.py:1392: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+run_flax_speech_recognition_ctc.py:1399: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+2022-07-30 12:14:09.660409: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2130] Execution of replica 0 failed: RESOURCE_EXHAUSTED: Attempting to reserve 6.77G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 6.48G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation.  The nearest obstacle is at 6.48G from the bottom with size 16.0K.
+Training...:   0%|                                                                                                                                                                                                                                               | 0/1891 [04:30<?, ?it/s]
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [04:37<?, ?it/s]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback
+    return fun(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss
+    out_tree, out_flat = f_pmapped_(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f
+    out = pxla.xla_pmap(
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind
+    return map_bind(self, fun, *args, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind
+    outs = primitive.process(top_trace, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process
+    return trace.process_map(self, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call
+    return primitive.impl(f, *tracers, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 841, in xla_pmap_impl
+    return compiled_fun(*args)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper
+    return func(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1656, in __call__
+    out_bufs = self.xla_executable.execute_sharded_on_local_devices(input_bufs)
+jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: Attempting to reserve 6.77G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 6.48G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation.  The nearest obstacle is at 6.48G from the bottom with size 16.0K.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
+The stack trace below excludes JAX-internal frames.
+The preceding is the original exception that occurred, unmodified.
+--------------------
+The above exception was the direct cause of the following exception:
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: Attempting to reserve 6.77G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 6.48G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation.  The nearest obstacle is at 6.48G from the bottom with size 16.0K.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
\ No newline at end of file
diff --git a/wandb/run-20220730_115718-1xckv47v/files/requirements.txt b/wandb/run-20220730_115718-1xckv47v/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab
--- /dev/null
+++ b/wandb/run-20220730_115718-1xckv47v/files/requirements.txt
@@ -0,0 +1,158 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+exceptiongroup==1.0.0rc8
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+hypothesis==6.53.0
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+jiwer==2.3.0
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pyctcdecode==0.4.0
+pygments==2.11.1
+pygtrie==2.5.0
+pyparsing==3.0.6
+python-dateutil==2.8.2
+python-levenshtein==0.12.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+sortedcontainers==2.4.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220730_115718-1xckv47v/files/wandb-metadata.json b/wandb/run-20220730_115718-1xckv47v/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..4bcd35b5167cbbba5a54b4a189f2d1bc09a0c195
--- /dev/null
+++ b/wandb/run-20220730_115718-1xckv47v/files/wandb-metadata.json
@@ -0,0 +1,67 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-30T11:57:21.565402",
+    "startedAt": "2022-07-30T11:57:18.095485",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=./",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=20",
+        "--per_device_eval_batch_size=20",
+        "--gradient_accumulation_steps=1",
+        "--precision=full_mixed",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220730_115718-1xckv47v/files/wandb-summary.json b/wandb/run-20220730_115718-1xckv47v/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..a7ef6c1b2317e4698b2d4108e7c29c613ad653db
--- /dev/null
+++ b/wandb/run-20220730_115718-1xckv47v/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 1012}}
\ No newline at end of file
diff --git a/wandb/run-20220730_115718-1xckv47v/logs/debug-internal.log b/wandb/run-20220730_115718-1xckv47v/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..1bd716e1cbfc7bc75dc3a97b177ca49fffe11498
--- /dev/null
+++ b/wandb/run-20220730_115718-1xckv47v/logs/debug-internal.log
@@ -0,0 +1,576 @@
+2022-07-30 11:57:19,006 INFO    MainThread:2100683 [internal.py:wandb_internal():87] W&B internal server running at pid: 2100683, started at: 2022-07-30 11:57:19.006054
+2022-07-30 11:57:19,008 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: check_version
+2022-07-30 11:57:19,008 INFO    WriterThread:2100683 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/run-1xckv47v.wandb
+2022-07-30 11:57:19,009 DEBUG   SenderThread:2100683 [sender.py:send():234] send: header
+2022-07-30 11:57:19,009 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: check_version
+2022-07-30 11:57:19,060 DEBUG   SenderThread:2100683 [sender.py:send():234] send: run
+2022-07-30 11:57:19,249 INFO    SenderThread:2100683 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files
+2022-07-30 11:57:19,249 INFO    SenderThread:2100683 [sender.py:_start_run_threads():804] run started: 1xckv47v with start time 1659182238
+2022-07-30 11:57:19,249 DEBUG   SenderThread:2100683 [sender.py:send():234] send: summary
+2022-07-30 11:57:19,249 INFO    SenderThread:2100683 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 11:57:19,249 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: run_start
+2022-07-30 11:57:20,269 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/wandb-summary.json
+2022-07-30 11:57:21,565 DEBUG   HandlerThread:2100683 [meta.py:__init__():40] meta init
+2022-07-30 11:57:21,565 DEBUG   HandlerThread:2100683 [meta.py:__init__():54] meta init done
+2022-07-30 11:57:21,565 DEBUG   HandlerThread:2100683 [meta.py:probe():214] probe
+2022-07-30 11:57:21,566 DEBUG   HandlerThread:2100683 [meta.py:_setup_git():204] setup git
+2022-07-30 11:57:21,604 DEBUG   HandlerThread:2100683 [meta.py:_setup_git():211] setup git done
+2022-07-30 11:57:21,604 DEBUG   HandlerThread:2100683 [meta.py:_save_code():92] save code
+2022-07-30 11:57:21,617 DEBUG   HandlerThread:2100683 [meta.py:_save_code():113] save code done
+2022-07-30 11:57:21,618 DEBUG   HandlerThread:2100683 [meta.py:_save_patches():130] save patches
+2022-07-30 11:57:21,695 DEBUG   HandlerThread:2100683 [meta.py:_save_patches():172] save patches done
+2022-07-30 11:57:21,695 DEBUG   HandlerThread:2100683 [meta.py:_save_pip():58] save pip
+2022-07-30 11:57:21,696 DEBUG   HandlerThread:2100683 [meta.py:_save_pip():72] save pip done
+2022-07-30 11:57:21,696 DEBUG   HandlerThread:2100683 [meta.py:probe():252] probe done
+2022-07-30 11:57:21,699 DEBUG   SenderThread:2100683 [sender.py:send():234] send: files
+2022-07-30 11:57:21,699 INFO    SenderThread:2100683 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-30 11:57:21,699 INFO    SenderThread:2100683 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-30 11:57:21,700 INFO    SenderThread:2100683 [sender.py:_save_file():939] saving file diff.patch with policy now
+2022-07-30 11:57:21,707 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:57:21,707 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:57:22,197 INFO    Thread-11 :2100683 [upload_job.py:push():137] Uploaded file /tmp/tmpcy1kthhcwandb/3w11fl2a-wandb-metadata.json
+2022-07-30 11:57:22,212 INFO    Thread-13 :2100683 [upload_job.py:push():137] Uploaded file /tmp/tmpcy1kthhcwandb/3tkinvp2-diff.patch
+2022-07-30 11:57:22,268 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/code/run_flax_speech_recognition_ctc.py
+2022-07-30 11:57:22,269 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/wandb-metadata.json
+2022-07-30 11:57:22,269 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/requirements.txt
+2022-07-30 11:57:22,269 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:57:22,269 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/diff.patch
+2022-07-30 11:57:22,269 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/code
+2022-07-30 11:57:22,394 INFO    Thread-12 :2100683 [upload_job.py:push():137] Uploaded file /tmp/tmpcy1kthhcwandb/3mlm83yr-code/run_flax_speech_recognition_ctc.py
+2022-07-30 11:57:24,269 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:57:26,270 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:57:28,272 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:57:30,273 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:57:36,276 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:57:36,845 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:57:36,845 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:57:38,276 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:57:49,650 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 11:57:51,984 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:57:51,985 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:57:52,282 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:57:54,283 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:57:56,284 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:58:05,287 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:58:07,119 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:58:07,120 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:58:07,288 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:58:19,727 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 11:58:21,294 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:58:22,254 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:58:22,255 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:58:37,414 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:58:37,414 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:58:49,805 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 11:58:52,720 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:58:52,720 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:59:04,311 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:06,312 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:07,883 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:59:07,884 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:59:08,313 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:10,314 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:12,315 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:14,316 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:16,317 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:18,318 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:19,880 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 11:59:20,319 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:22,320 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:23,061 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:59:23,062 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:59:24,321 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:26,322 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:28,323 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:30,324 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:32,325 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:34,326 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:36,327 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:38,257 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:59:38,258 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:59:38,328 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:40,330 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:42,330 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:45,332 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:47,333 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:49,334 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:49,960 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 11:59:51,335 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:53,336 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:53,397 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 11:59:53,398 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 11:59:55,337 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:57,338 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 11:59:59,342 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:01,340 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:03,341 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:05,343 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:07,345 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:08,546 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:00:08,546 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:00:09,346 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:11,347 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:13,348 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:15,349 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:17,350 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:19,351 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:20,032 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:00:21,355 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:23,353 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:23,746 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:00:23,746 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:00:25,354 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:27,356 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:29,357 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:31,358 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:33,359 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:35,360 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:37,362 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:38,888 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:00:38,889 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:00:39,363 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:41,364 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:43,365 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:45,366 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:47,367 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:49,368 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:50,118 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:00:51,369 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:53,370 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:54,024 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:00:54,024 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:00:55,371 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:57,373 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:00:59,374 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:01,375 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:03,376 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:05,376 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:07,378 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:09,160 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:01:09,161 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:01:09,382 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:11,380 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:13,381 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:15,382 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:17,384 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:19,384 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:20,193 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:01:21,386 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:23,387 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:24,314 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:01:24,314 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:01:25,388 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:27,390 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:29,391 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:31,392 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:33,393 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:35,395 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:37,395 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:39,396 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:39,469 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:01:39,470 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:01:41,397 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:43,398 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:45,399 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:47,400 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:49,401 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:50,287 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:01:51,403 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:53,404 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:54,608 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:01:54,608 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:01:55,405 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:57,406 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:01:59,407 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:01,408 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:03,410 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:05,411 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:07,412 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:09,413 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:09,753 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:02:09,754 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:02:11,414 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:13,415 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:15,416 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:17,417 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:19,418 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:20,379 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:02:21,419 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:23,420 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:24,893 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:02:24,893 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:02:25,421 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:27,422 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:29,423 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:31,425 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:33,426 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:35,427 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:37,428 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:39,429 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:40,051 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:02:40,051 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:02:41,430 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:43,431 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:45,434 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:47,433 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:49,434 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:50,452 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:02:51,435 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:53,436 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:55,195 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:02:55,195 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:02:55,437 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:57,438 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:02:59,439 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:01,440 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:03,441 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:05,442 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:07,443 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:09,444 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:10,353 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:03:10,353 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:03:11,446 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:13,447 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:15,448 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:17,451 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:19,452 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:20,524 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:03:21,453 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:23,454 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:25,456 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:25,492 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:03:25,492 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:03:27,457 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:29,458 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:32,459 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:34,460 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:36,461 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:38,462 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:40,463 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:40,635 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:03:40,636 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:03:42,464 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:44,466 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:46,471 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:48,472 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:50,473 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:50,617 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:03:52,474 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:54,477 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:55,772 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:03:55,773 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:03:56,478 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:03:58,479 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:00,481 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:02,482 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:04,483 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:06,484 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:08,486 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:10,487 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:10,957 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:04:10,958 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:04:12,488 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:14,489 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:16,490 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:18,492 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:20,493 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:20,699 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:04:22,494 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:24,495 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:26,109 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:04:26,110 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:04:26,496 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:28,497 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:30,498 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:32,499 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:34,500 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:36,501 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:38,502 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:40,503 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:41,254 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:04:41,255 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:04:42,504 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:44,506 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:46,507 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:04:50,783 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:04:56,392 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:04:56,393 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:05:11,600 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:05:11,600 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:05:15,522 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:17,523 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:19,524 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:20,857 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:05:21,525 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:23,527 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:25,528 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:26,847 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:05:26,852 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:05:27,529 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:29,530 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:31,531 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:33,532 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:35,533 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:37,534 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:39,535 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:41,536 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:41,996 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:05:41,997 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:05:43,537 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:45,538 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:47,539 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:49,543 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:50,941 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:05:51,544 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:53,546 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:55,547 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:05:57,135 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:05:57,135 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:06:12,274 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:06:12,274 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:06:21,024 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:06:27,503 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:06:27,503 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:06:33,565 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:06:35,566 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:06:37,567 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:06:39,573 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:06:41,574 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:06:42,717 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:06:42,717 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:06:43,575 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:06:45,577 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:06:47,578 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:06:49,579 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:06:51,101 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:06:51,580 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:06:53,581 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:06:55,582 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:06:57,583 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:06:57,855 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:06:57,856 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:06:59,584 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:01,585 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:03,586 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:07,588 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:09,589 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:11,590 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:13,004 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:07:13,005 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:07:13,591 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:15,593 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:17,594 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:19,595 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:21,189 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:07:21,597 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:23,598 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:25,599 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:27,600 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:28,147 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:07:28,147 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:07:29,601 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:31,602 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:33,603 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:36,605 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:38,606 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:40,607 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:42,608 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:43,294 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:07:43,294 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:07:44,609 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:46,610 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:48,611 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:50,612 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:51,271 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:07:52,613 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:54,614 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:56,615 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:07:58,432 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:07:58,432 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:08:13,567 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:08:13,567 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:08:21,343 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:08:28,719 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:08:28,719 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:08:40,633 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:08:44,070 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:08:44,070 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:08:48,636 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:08:51,414 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:08:57,640 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:08:59,415 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:08:59,416 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:09:06,644 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:09:14,648 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:09:14,822 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:09:14,822 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:09:21,485 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:09:23,652 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:09:29,654 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:09:30,071 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:09:30,072 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:09:31,655 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:09:33,656 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:09:42,660 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:09:45,442 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:09:45,443 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:09:51,559 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:10:00,687 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:10:00,687 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:10:15,852 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:10:15,852 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:10:20,675 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:10:21,635 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:10:28,679 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:10:31,135 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:10:31,136 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:10:31,680 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:10:46,300 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:10:46,300 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:10:51,711 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:11:01,449 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:11:01,450 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:11:16,905 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:11:16,905 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:11:21,790 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:11:32,041 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:11:32,041 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:11:47,175 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:11:47,176 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:11:51,866 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:12:02,312 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:12:02,313 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:12:17,454 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:12:17,454 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:12:21,945 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:12:32,586 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:12:32,586 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:12:47,720 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:12:47,720 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:12:52,021 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:13:02,858 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:13:02,858 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:13:18,016 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:13:18,016 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:13:22,094 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:13:33,162 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:13:33,162 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:13:48,301 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:13:48,302 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:13:52,166 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:14:03,438 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:14:03,438 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:14:10,811 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:14:12,147 DEBUG   SenderThread:2100683 [sender.py:send():234] send: telemetry
+2022-07-30 12:14:12,147 DEBUG   SenderThread:2100683 [sender.py:send():234] send: exit
+2022-07-30 12:14:12,147 INFO    SenderThread:2100683 [sender.py:send_exit():366] handling exit code: 1
+2022-07-30 12:14:12,148 INFO    SenderThread:2100683 [sender.py:send_exit():368] handling runtime: 1012
+2022-07-30 12:14:12,148 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:14:12,149 INFO    SenderThread:2100683 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 12:14:12,149 INFO    SenderThread:2100683 [sender.py:send_exit():374] send defer
+2022-07-30 12:14:12,149 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:14:12,150 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:14:12,150 INFO    HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-30 12:14:12,150 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:14:12,150 INFO    SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-30 12:14:12,150 INFO    SenderThread:2100683 [sender.py:transition_state():387] send defer: 1
+2022-07-30 12:14:12,151 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:14:12,151 INFO    HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-30 12:14:12,215 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:14:12,215 INFO    SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-30 12:14:12,215 INFO    SenderThread:2100683 [sender.py:transition_state():387] send defer: 2
+2022-07-30 12:14:12,215 DEBUG   SenderThread:2100683 [sender.py:send():234] send: stats
+2022-07-30 12:14:12,216 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:14:12,216 INFO    HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-30 12:14:12,216 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:14:12,216 INFO    SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-30 12:14:12,216 INFO    SenderThread:2100683 [sender.py:transition_state():387] send defer: 3
+2022-07-30 12:14:12,216 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:14:12,216 INFO    HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-30 12:14:12,216 DEBUG   SenderThread:2100683 [sender.py:send():234] send: summary
+2022-07-30 12:14:12,217 INFO    SenderThread:2100683 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 12:14:12,217 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:14:12,217 INFO    SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-30 12:14:12,217 INFO    SenderThread:2100683 [sender.py:transition_state():387] send defer: 4
+2022-07-30 12:14:12,217 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:14:12,217 INFO    HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-30 12:14:12,217 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:14:12,217 INFO    SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-30 12:14:12,253 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:14:12,377 INFO    SenderThread:2100683 [sender.py:transition_state():387] send defer: 5
+2022-07-30 12:14:12,377 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:14:12,377 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:14:12,377 INFO    HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-30 12:14:12,378 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:14:12,378 INFO    SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-30 12:14:12,378 INFO    SenderThread:2100683 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-30 12:14:12,479 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:14:12,812 INFO    Thread-8  :2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/config.yaml
+2022-07-30 12:14:12,813 INFO    SenderThread:2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/wandb-summary.json
+2022-07-30 12:14:12,813 INFO    SenderThread:2100683 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:14:12,813 INFO    SenderThread:2100683 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files
+2022-07-30 12:14:12,813 INFO    SenderThread:2100683 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/config.yaml config.yaml
+2022-07-30 12:14:12,814 INFO    SenderThread:2100683 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/diff.patch diff.patch
+2022-07-30 12:14:12,814 INFO    SenderThread:2100683 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/requirements.txt requirements.txt
+2022-07-30 12:14:12,814 INFO    SenderThread:2100683 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log output.log
+2022-07-30 12:14:12,814 INFO    SenderThread:2100683 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/wandb-summary.json wandb-summary.json
+2022-07-30 12:14:12,814 INFO    SenderThread:2100683 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/wandb-metadata.json wandb-metadata.json
+2022-07-30 12:14:12,814 INFO    SenderThread:2100683 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-30 12:14:12,814 INFO    SenderThread:2100683 [sender.py:transition_state():387] send defer: 6
+2022-07-30 12:14:12,814 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:14:12,816 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:14:12,816 INFO    HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-30 12:14:12,819 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:14:12,819 INFO    SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-30 12:14:12,819 INFO    SenderThread:2100683 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 12:14:12,916 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:14:12,917 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:14:13,019 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:14:13,019 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:14:13,121 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:14:13,121 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:14:13,223 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:14:13,223 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:14:13,295 INFO    Thread-15 :2100683 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/requirements.txt
+2022-07-30 12:14:13,325 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:14:13,325 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:14:13,330 INFO    Thread-17 :2100683 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/wandb-summary.json
+2022-07-30 12:14:13,335 INFO    Thread-14 :2100683 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/config.yaml
+2022-07-30 12:14:13,426 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:14:13,427 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:14:13,503 INFO    Thread-16 :2100683 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/files/output.log
+2022-07-30 12:14:13,528 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:14:13,528 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:14:13,630 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:14:13,630 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:14:13,704 INFO    Thread-7  :2100683 [sender.py:transition_state():387] send defer: 7
+2022-07-30 12:14:13,705 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:14:13,705 INFO    HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-30 12:14:13,705 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:14:13,705 INFO    SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-30 12:14:13,732 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:14:14,143 INFO    SenderThread:2100683 [sender.py:transition_state():387] send defer: 8
+2022-07-30 12:14:14,143 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:14:14,144 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:14:14,144 INFO    HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-30 12:14:14,144 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:14:14,144 INFO    SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-30 12:14:14,144 INFO    SenderThread:2100683 [sender.py:transition_state():387] send defer: 9
+2022-07-30 12:14:14,144 DEBUG   SenderThread:2100683 [sender.py:send():234] send: final
+2022-07-30 12:14:14,145 DEBUG   SenderThread:2100683 [sender.py:send():234] send: footer
+2022-07-30 12:14:14,145 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:14:14,145 INFO    HandlerThread:2100683 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-30 12:14:14,145 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:14:14,145 INFO    SenderThread:2100683 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-30 12:14:14,245 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:14:14,246 DEBUG   SenderThread:2100683 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:14:14,246 INFO    SenderThread:2100683 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 12:14:14,513 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-30 12:14:14,514 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-30 12:14:14,515 DEBUG   HandlerThread:2100683 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-30 12:14:14,515 INFO    HandlerThread:2100683 [handler.py:finish():731] shutting down handler
+2022-07-30 12:14:15,145 INFO    WriterThread:2100683 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/run-1xckv47v.wandb
+2022-07-30 12:14:15,512 INFO    SenderThread:2100683 [sender.py:finish():1070] shutting down sender
+2022-07-30 12:14:15,512 INFO    SenderThread:2100683 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 12:14:15,512 INFO    SenderThread:2100683 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 12:14:15,516 INFO    MainThread:2100683 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220730_115718-1xckv47v/logs/debug.log b/wandb/run-20220730_115718-1xckv47v/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..3a5ea67cda0ea061f444b2c2b3936eeb580fd16e
--- /dev/null
+++ b/wandb/run-20220730_115718-1xckv47v/logs/debug.log
@@ -0,0 +1,148 @@
+2022-07-30 11:57:18,097 INFO    MainThread:2099342 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-30 11:57:18,097 INFO    MainThread:2099342 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-30 11:57:18,097 INFO    MainThread:2099342 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/logs/debug.log
+2022-07-30 11:57:18,097 INFO    MainThread:2099342 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_115718-1xckv47v/logs/debug-internal.log
+2022-07-30 11:57:18,097 INFO    MainThread:2099342 [wandb_init.py:init():404] calling init triggers
+2022-07-30 11:57:18,097 INFO    MainThread:2099342 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-30 11:57:18,097 INFO    MainThread:2099342 [wandb_init.py:init():460] starting backend
+2022-07-30 11:57:18,097 INFO    MainThread:2099342 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-30 11:57:18,155 INFO    MainThread:2099342 [backend.py:ensure_launched():216] starting backend process...
+2022-07-30 11:57:18,200 INFO    MainThread:2099342 [backend.py:ensure_launched():221] started backend process with pid: 2100683
+2022-07-30 11:57:18,202 INFO    MainThread:2099342 [wandb_init.py:init():469] backend started and connected
+2022-07-30 11:57:18,218 INFO    MainThread:2099342 [wandb_init.py:init():533] updated telemetry
+2022-07-30 11:57:18,329 INFO    MainThread:2099342 [wandb_init.py:init():563] communicating current version
+2022-07-30 11:57:19,059 INFO    MainThread:2099342 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-30 11:57:19,059 INFO    MainThread:2099342 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-30 11:57:19,249 INFO    MainThread:2099342 [wandb_init.py:init():606] starting run threads in backend
+2022-07-30 11:57:21,703 INFO    MainThread:2099342 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-30 11:57:21,703 INFO    MainThread:2099342 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-30 11:57:21,704 INFO    MainThread:2099342 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-30 11:57:21,706 INFO    MainThread:2099342 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-30 11:57:21,706 INFO    MainThread:2099342 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-30 12:14:09,859 INFO    MainThread:2099342 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-30 12:14:09,864 INFO    MainThread:2099342 [wandb_run.py:_restore():1752] restore
+2022-07-30 12:14:12,151 INFO    MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 74351
+}
+
+2022-07-30 12:14:12,378 INFO    MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 74351
+}
+
+2022-07-30 12:14:12,815 INFO    MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 352530
+}
+
+2022-07-30 12:14:12,917 INFO    MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 352530
+}
+
+2022-07-30 12:14:13,020 INFO    MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 352530
+  total_bytes: 352530
+}
+
+2022-07-30 12:14:13,122 INFO    MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 352530
+  total_bytes: 352530
+}
+
+2022-07-30 12:14:13,224 INFO    MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 352530
+  total_bytes: 352530
+}
+
+2022-07-30 12:14:13,325 INFO    MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 352530
+  total_bytes: 352530
+}
+
+2022-07-30 12:14:13,427 INFO    MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 352530
+  total_bytes: 352530
+}
+
+2022-07-30 12:14:13,529 INFO    MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 352530
+  total_bytes: 352530
+}
+
+2022-07-30 12:14:13,631 INFO    MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 352530
+  total_bytes: 352530
+}
+
+2022-07-30 12:14:14,144 INFO    MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 352530
+  total_bytes: 352530
+}
+
+2022-07-30 12:14:14,512 INFO    MainThread:2099342 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 352530
+  total_bytes: 352530
+}
+local_info {
+}
+
+2022-07-30 12:14:16,128 INFO    MainThread:2099342 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220730_115718-1xckv47v/run-1xckv47v.wandb b/wandb/run-20220730_115718-1xckv47v/run-1xckv47v.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..f723f166353ea5a6b6289dca3f792c47d7ff34a6
--- /dev/null
+++ b/wandb/run-20220730_115718-1xckv47v/run-1xckv47v.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f90f5516ba68c809b54d5aed385c1e6a47f9106bde8abf07065bde01cf1e335
+size 409339
diff --git a/wandb/run-20220730_122457-1iypf07q/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_122457-1iypf07q/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac
--- /dev/null
+++ b/wandb/run-20220730_122457-1iypf07q/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1605 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220730_122457-1iypf07q/files/config.yaml b/wandb/run-20220730_122457-1iypf07q/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..aa1ea04d345a61a265eace828abf4596b653070e
--- /dev/null
+++ b/wandb/run-20220730_122457-1iypf07q/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659183897
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220730_122457-1iypf07q/files/diff.patch b/wandb/run-20220730_122457-1iypf07q/files/diff.patch
new file mode 100644
index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642
--- /dev/null
+++ b/wandb/run-20220730_122457-1iypf07q/files/diff.patch
@@ -0,0 +1,10 @@
+diff --git a/.gitattributes b/.gitattributes
+index 755356a..f0eef4b 100644
+--- a/.gitattributes
++++ b/.gitattributes
+@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+ *.zip filter=lfs diff=lfs merge=lfs -text
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
++*.wandb filter=lfs diff=lfs merge=lfs -text
+\ No newline at end of file
diff --git a/wandb/run-20220730_122457-1iypf07q/files/output.log b/wandb/run-20220730_122457-1iypf07q/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..9e568a88c541956990cb1195ed4e86fcb67e0313
--- /dev/null
+++ b/wandb/run-20220730_122457-1iypf07q/files/output.log
@@ -0,0 +1,1247 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul30_12-24-49_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=20,
+per_device_train_batch_size=20,
+precision=full_mixed,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+
+
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:07<00:00,  3.86s/it]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.09it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_q', 'kernel'), ('project_hid', 'kernel'), ('quantizer', 'weight_proj', 'kernel'), ('project_q', 'bias'), ('quantizer', 'weight_proj', 'bias'), ('quantizer', 'codevectors'), ('project_hid', 'bias')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'kernel'), ('lm_head', 'bias')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #0:   0%|▌                                                                                                                                                                                                        | 29/9523 [00:01<06:34, 24.05ex/s]
+removing punctuation from train split #1:   0%|▋                                                                                                                                                                                                        | 34/9523 [00:01<05:14, 30.21ex/s]
+removing punctuation from train split #2:   0%|▌                                                                                                                                                                                                        | 28/9523 [00:01<07:10, 22.05ex/s]
+removing punctuation from train split #3:   0%|▌                                                                                                                                                                                                        | 26/9523 [00:00<04:36, 34.30ex/s]
+removing punctuation from train split #4:   0%|▌                                                                                                                                                                                                        | 25/9523 [00:00<04:38, 34.12ex/s]
+removing punctuation from train split #5:   0%|▍                                                                                                                                                                                                        | 20/9523 [00:00<05:22, 29.45ex/s]
+removing punctuation from train split #6:   0%|▍                                                                                                                                                                                                        | 23/9523 [00:00<04:59, 31.74ex/s]
+removing punctuation from train split #7:   0%|▍                                                                                                                                                                                                        | 18/9523 [00:00<05:22, 29.46ex/s]
+removing punctuation from train split #8:   0%|▎                                                                                                                                                                                                        | 16/9523 [00:00<05:15, 30.18ex/s]
+removing punctuation from train split #9:   0%|▎                                                                                                                                                                                                        | 17/9523 [00:00<05:26, 29.09ex/s]
+removing punctuation from train split #10:   0%|▍                                                                                                                                                                                                       | 21/9523 [00:00<03:54, 40.58ex/s]
+removing punctuation from train split #11:   0%|▎                                                                                                                                                                                                       | 14/9523 [00:00<05:20, 29.64ex/s]
+removing punctuation from train split #12:   0%|▏                                                                                                                                                                                                        | 9/9522 [00:00<06:47, 23.37ex/s]
+removing punctuation from train split #13:   0%|▎                                                                                                                                                                                                       | 12/9522 [00:00<05:13, 30.37ex/s]
+removing punctuation from train split #14:   0%|▎                                                                                                                                                                                                       | 15/9522 [00:00<03:27, 45.77ex/s]
+removing punctuation from train split #15:   0%|▏                                                                                                                                                                                                        | 9/9522 [00:00<04:15, 37.30ex/s]
+removing punctuation from train split #16:   0%|▏                                                                                                                                                                                                        | 8/9522 [00:00<04:11, 37.86ex/s]
+removing punctuation from train split #17:   0%|▏                                                                                                                                                                                                        | 7/9522 [00:00<04:49, 32.82ex/s]
+removing punctuation from train split #18:   0%|                                                                                                                                                                                                         | 4/9522 [00:00<04:04, 38.90ex/s]
+removing punctuation from train split #19:   0%|                                                                                                                                                                                                         | 3/9522 [00:00<07:51, 20.21ex/s]
+removing punctuation from train split #20:   0%|                                                                                                                                                                                                                 | 0/9522 [00:00<?, ?ex/s]
+removing punctuation from train split #21:   0%|                                                                                                                                                                                                                 | 0/9522 [00:00<?, ?ex/s]
+removing punctuation from train split #22:   1%|██▏                                                                                                                                                                                                    | 107/9522 [00:01<02:20, 67.04ex/s]
+removing punctuation from train split #23:   1%|██▎                                                                                                                                                                                                    | 112/9522 [00:01<02:05, 74.89ex/s]
+removing punctuation from train split #24:   1%|██▌                                                                                                                                                                                                    | 123/9522 [00:01<01:44, 89.55ex/s]
+removing punctuation from train split #25:   1%|██▍                                                                                                                                                                                                    | 117/9522 [00:01<02:02, 77.04ex/s]
+removing punctuation from train split #26:   1%|██                                                                                                                                                                                                     | 100/9522 [00:01<02:36, 60.39ex/s]
+removing punctuation from train split #27:   1%|██                                                                                                                                                                                                      | 99/9522 [00:01<02:15, 69.58ex/s]
+removing punctuation from train split #28:   1%|█▉                                                                                                                                                                                                      | 95/9522 [00:01<02:12, 70.98ex/s]
+removing punctuation from train split #29:   1%|██▎                                                                                                                                                                                                    | 108/9522 [00:01<02:24, 64.96ex/s]
+removing punctuation from train split #30:   1%|██                                                                                                                                                                                                     | 101/9522 [00:01<02:06, 74.61ex/s]
+
+
+
+
+
+
+
+
+removing punctuation from train split #0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:20<00:00, 464.18ex/s]
+removing punctuation from train split #21:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 9403/9522 [00:19<00:00, 6635.69ex/s]
+removing punctuation from train split #8:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍   | 9351/9523 [00:19<00:00, 6865.70ex/s]
+removing punctuation from train split #24:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 9403/9522 [00:18<00:00, 6240.79ex/s]
+removing punctuation from train split #10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 9500/9523 [00:19<00:00, 6616.28ex/s]
+removing punctuation from train split #29:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏     | 9242/9522 [00:18<00:00, 6503.25ex/s]
+removing punctuation from train split #15: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 9518/9522 [00:19<00:00, 6111.69ex/s]
+removing punctuation from train split #31:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 9459/9522 [00:18<00:00, 6711.30ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00015_of_00032.arrow9298/9522 [00:18<00:00, 6297.44ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow
+preprocess dataset #0:   0%|                                                                                                                                                                                                                                     | 0/9497 [00:00<?, ?ex/s]
+preprocess dataset #1:   0%|                                                                                                                                                                                                                                     | 0/9497 [00:00<?, ?ex/s]
+preprocess dataset #2:   0%|                                                                                                                                                                                                                                     | 0/9497 [00:00<?, ?ex/s]
+preprocess dataset #3:   0%|                                                                                                                                                                                                                                     | 0/9497 [00:00<?, ?ex/s]
+preprocess dataset #4:   0%|                                                                                                                                                                                                                                     | 0/9497 [00:00<?, ?ex/s]
+preprocess dataset #5:   0%|▉                                                                                                                                                                                                                           | 39/9497 [00:01<04:26, 35.45ex/s]
+preprocess dataset #6:   0%|█                                                                                                                                                                                                                           | 44/9497 [00:01<03:30, 44.86ex/s]
+preprocess dataset #7:   1%|██                                                                                                                                                                                                                          | 87/9497 [00:02<04:19, 36.25ex/s]
+preprocess dataset #8:   1%|█▍                                                                                                                                                                                                                          | 61/9497 [00:01<03:36, 43.61ex/s]
+preprocess dataset #9:   0%|▋                                                                                                                                                                                                                           | 27/9497 [00:01<05:15, 29.99ex/s]
+preprocess dataset #10:   1%|█▉                                                                                                                                                                                                                         | 85/9497 [00:02<04:01, 38.95ex/s]
+preprocess dataset #11:   1%|█▎                                                                                                                                                                                                                         | 58/9496 [00:01<04:05, 38.43ex/s]
+preprocess dataset #12:   0%|▌                                                                                                                                                                                                                          | 24/9496 [00:00<04:07, 38.27ex/s]
+preprocess dataset #13:   1%|█▎                                                                                                                                                                                                                         | 59/9496 [00:01<03:47, 41.52ex/s]
+preprocess dataset #14:   0%|▊                                                                                                                                                                                                                          | 34/9496 [00:01<03:59, 39.55ex/s]
+preprocess dataset #15:   1%|█▎                                                                                                                                                                                                                         | 59/9496 [00:01<03:37, 43.31ex/s]
+preprocess dataset #16:   0%|▋                                                                                                                                                                                                                          | 32/9496 [00:01<03:53, 40.47ex/s]
+preprocess dataset #17:   1%|█▍                                                                                                                                                                                                                         | 60/9496 [00:02<04:05, 38.39ex/s]
+preprocess dataset #18:   0%|▊                                                                                                                                                                                                                          | 34/9496 [00:01<04:36, 34.20ex/s]
+preprocess dataset #18:   1%|██▎                                                                                                                                                                                                                       | 102/9496 [00:03<04:33, 34.37ex/s]
+preprocess dataset #19:   1%|█▌                                                                                                                                                                                                                         | 70/9496 [00:02<04:22, 35.90ex/s]
+preprocess dataset #20:   0%|▌                                                                                                                                                                                                                          | 27/9496 [00:01<05:09, 30.58ex/s]
+preprocess dataset #21:   1%|█▎                                                                                                                                                                                                                         | 59/9496 [00:02<04:25, 35.50ex/s]
+preprocess dataset #22:   1%|██▎                                                                                                                                                                                                                       | 102/9496 [00:03<05:23, 29.01ex/s]
+preprocess dataset #23:   1%|█▎                                                                                                                                                                                                                         | 58/9496 [00:02<05:12, 30.21ex/s]
+preprocess dataset #24:   1%|█▊                                                                                                                                                                                                                         | 77/9496 [00:02<04:48, 32.69ex/s]
+preprocess dataset #25:   0%|▉                                                                                                                                                                                                                          | 40/9496 [00:01<05:13, 30.18ex/s]
+preprocess dataset #26:   1%|█▊                                                                                                                                                                                                                         | 80/9496 [00:02<04:11, 37.49ex/s]
+preprocess dataset #27:   0%|▋                                                                                                                                                                                                                          | 30/9496 [00:01<06:09, 25.64ex/s]
+preprocess dataset #28:   1%|█▍                                                                                                                                                                                                                         | 65/9496 [00:02<05:23, 29.18ex/s]
+preprocess dataset #29:   0%|▋                                                                                                                                                                                                                          | 29/9496 [00:01<05:51, 26.92ex/s]
+preprocess dataset #30:   1%|█▍                                                                                                                                                                                                                         | 60/9496 [00:02<04:59, 31.50ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #19:  47%|████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                    | 4418/9496 [02:25<03:07, 27.04ex/s]
+preprocess dataset #20:  46%|███████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                     | 4374/9496 [02:24<02:44, 31.22ex/s]
+preprocess dataset #21:  46%|███████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                                     | 4341/9496 [02:23<02:32, 33.84ex/s]
+preprocess dataset #22:  45%|█████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                       | 4286/9496 [02:22<02:45, 31.44ex/s]
+preprocess dataset #23:  45%|██████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                                      | 4296/9496 [02:21<02:45, 31.46ex/s]
+preprocess dataset #24:  46%|███████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                                      | 4337/9496 [02:20<02:57, 29.08ex/s]
+preprocess dataset #25:  45%|█████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                                       | 4277/9496 [02:18<02:38, 32.90ex/s]
+preprocess dataset #26:  44%|██████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                          | 4153/9496 [02:18<03:03, 29.10ex/s]
+preprocess dataset #27:  44%|███████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                                         | 4171/9496 [02:16<02:51, 30.96ex/s]
+preprocess dataset #28:  44%|██████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                                                          | 4150/9496 [02:15<03:09, 28.27ex/s]
+preprocess dataset #29:  44%|██████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                          | 4157/9496 [02:14<02:47, 31.95ex/s]
+preprocess dataset #30:  42%|███████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                                             | 4010/9496 [02:13<05:46, 15.85ex/s]
+
+
+
+
+preprocess dataset #30:  46%|██████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                      | 4328/9496 [02:23<02:08, 40.34ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #27:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                 | 8716/9496 [04:43<00:22, 35.05ex/s]
+preprocess dataset #28:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                   | 8665/9496 [04:42<00:27, 29.85ex/s]
+preprocess dataset #28:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                   | 8669/9496 [04:42<00:25, 31.83ex/s]
+preprocess dataset #29:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                  | 8693/9496 [04:40<00:28, 28.24ex/s]
+preprocess dataset #29:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                | 8763/9496 [04:42<00:27, 26.89ex/s]
+preprocess dataset #29:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                | 8766/9496 [04:42<00:28, 25.84ex/s]
+preprocess dataset #27:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████               | 8842/9496 [04:47<00:19, 33.63ex/s]
+preprocess dataset #28:  93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                | 8794/9496 [04:46<00:20, 33.74ex/s]
+preprocess dataset #29:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉               | 8835/9496 [04:44<00:18, 36.63ex/s]
+preprocess dataset #30:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                 | 8717/9496 [04:43<00:23, 32.63ex/s]
+
+preprocess dataset #24:   0%|▋                                                                                                                                                                                                                           | 4/1266 [00:00<02:35,  8.10ex/s]
+preprocess dataset #25:   0%|▌                                                                                                                                                                                                                           | 3/1266 [00:00<03:23,  6.21ex/s]
+preprocess dataset #26:   0%|▋                                                                                                                                                                                                                           | 4/1266 [00:00<02:40,  7.84ex/s]
+preprocess dataset #27:   0%|▋                                                                                                                                                                                                                           | 4/1266 [00:00<02:30,  8.41ex/s]
+preprocess dataset #28:   0%|▏                                                                                                                                                                                                                           | 1/1266 [00:00<10:10,  2.07ex/s]
+preprocess dataset #29:   0%|▏                                                                                                                                                                                                                           | 1/1266 [00:00<10:08,  2.08ex/s]
+preprocess dataset #30:   0%|▏                                                                                                                                                                                                                           | 1/1266 [00:00<10:33,  2.00ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #28:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌              | 1182/1266 [00:38<00:02, 34.44ex/s]
+preprocess dataset #29:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎          | 1204/1266 [00:38<00:01, 41.89ex/s]
+preprocess dataset #30:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎              | 1180/1266 [00:38<00:02, 31.75ex/s]
+preprocess dataset #21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1266/1266 [00:40<00:00, 31.19ex/s]
+preprocess dataset #3: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1267/1267 [00:41<00:00, 30.81ex/s]
+preprocess dataset #2: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1267/1267 [00:41<00:00, 30.76ex/s]
+preprocess dataset #28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1266/1266 [00:40<00:00, 31.23ex/s]
+preprocess dataset #13: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1267/1267 [00:41<00:00, 30.88ex/s]
+preprocess dataset #9: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1267/1267 [00:41<00:00, 30.79ex/s]
+preprocess dataset #0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1267/1267 [00:41<00:00, 30.62ex/s]
+preprocess dataset #4: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1267/1267 [00:41<00:00, 30.68ex/s]
+preprocess dataset #31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1266/1266 [00:40<00:00, 31.24ex/s]
+preprocess dataset #16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1267/1267 [00:40<00:00, 30.91ex/s]
+preprocess dataset #11:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 1251/1267 [00:40<00:00, 38.20ex/s]
+preprocess dataset #8:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉   | 1249/1267 [00:41<00:00, 31.94ex/s]
+preprocess dataset #6:  99%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 1256/1267 [00:41<00:00, 34.60ex/s]
+preprocess dataset #11:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 1255/1267 [00:40<00:00, 37.01ex/s]
+preprocess dataset #8:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 1253/1267 [00:41<00:00, 32.69ex/s]
+preprocess dataset #11:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1260/1267 [00:41<00:00, 38.70ex/s]
+preprocess dataset #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 1264/1267 [00:40<00:00, 39.89ex/s]
+preprocess dataset #21:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊      | 1230/1266 [00:39<00:01, 33.75ex/s]
+preprocess dataset #21:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌     | 1234/1266 [00:39<00:00, 32.74ex/s]
+preprocess dataset #16: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 1261/1267 [00:40<00:00, 40.15ex/s]
+preprocess dataset #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 1262/1266 [00:40<00:00, 34.97ex/s]
+preprocess dataset #25:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍        | 1216/1266 [00:39<00:01, 36.93ex/s]
+preprocess dataset #23:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 1249/1266 [00:40<00:00, 36.75ex/s]
+preprocess dataset #26:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍   | 1245/1266 [00:40<00:00, 30.46ex/s]
+preprocess dataset #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1266/1267 [00:40<00:00, 36.94ex/s]
+preprocess dataset #26:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 1253/1266 [00:40<00:00, 33.12ex/s]
+preprocess dataset #23:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 1254/1266 [00:40<00:00, 37.83ex/s]
+preprocess dataset #25:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 1249/1266 [00:40<00:00, 34.14ex/s]
+preprocess dataset #23:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1259/1266 [00:40<00:00, 40.31ex/s]
+preprocess dataset #26:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 1257/1266 [00:40<00:00, 33.77ex/s]
+preprocess dataset #27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 1260/1266 [00:40<00:00, 43.91ex/s]
+preprocess dataset #26: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1261/1266 [00:40<00:00, 34.65ex/s]
+preprocess dataset #30:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 1255/1266 [00:40<00:00, 35.42ex/s]
+preprocess dataset #31: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1261/1266 [00:40<00:00, 35.08ex/s]
+preprocess dataset #27: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1265/1266 [00:40<00:00, 44.23ex/s]
+preprocess dataset #31: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1265/1266 [00:40<00:00, 36.18ex/s]
+preprocess dataset #25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1266/1266 [00:40<00:00, 42.92ex/s]
+preprocess dataset #14:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #15:   1%|██▉                                                                                                                                                                                                                        | 34/2554 [00:01<01:43, 24.30ex/s]
+preprocess dataset #16:   1%|███                                                                                                                                                                                                                        | 36/2554 [00:01<01:29, 28.01ex/s]
+preprocess dataset #17:   1%|██▍                                                                                                                                                                                                                        | 28/2554 [00:01<01:45, 23.86ex/s]
+preprocess dataset #18:   1%|██▍                                                                                                                                                                                                                        | 29/2554 [00:01<01:48, 23.21ex/s]
+preprocess dataset #19:   1%|██▍                                                                                                                                                                                                                        | 29/2554 [00:01<01:49, 23.03ex/s]
+preprocess dataset #20:   1%|█▌                                                                                                                                                                                                                         | 18/2554 [00:01<01:59, 21.22ex/s]
+preprocess dataset #21:   1%|█▍                                                                                                                                                                                                                         | 17/2554 [00:01<01:55, 22.02ex/s]
+preprocess dataset #22:   1%|█▎                                                                                                                                                                                                                         | 16/2554 [00:01<01:57, 21.54ex/s]
+preprocess dataset #23:   1%|█▎                                                                                                                                                                                                                         | 15/2554 [00:00<01:59, 21.23ex/s]
+preprocess dataset #24:   1%|█                                                                                                                                                                                                                          | 13/2554 [00:01<02:26, 17.33ex/s]
+preprocess dataset #25:   0%|▌                                                                                                                                                                                                                           | 6/2554 [00:01<05:19,  7.97ex/s]
+preprocess dataset #26:   1%|█                                                                                                                                                                                                                          | 13/2554 [00:00<02:25, 17.49ex/s]
+preprocess dataset #27:   1%|█▏                                                                                                                                                                                                                         | 14/2554 [00:00<01:58, 21.39ex/s]
+preprocess dataset #28:   0%|▍                                                                                                                                                                                                                           | 5/2554 [00:00<05:43,  7.42ex/s]
+preprocess dataset #29:   0%|█                                                                                                                                                                                                                          | 12/2554 [00:00<02:11, 19.37ex/s]
+preprocess dataset #30:   0%|▋                                                                                                                                                                                                                           | 8/2554 [00:00<03:16, 12.98ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #25:  53%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                     | 1366/2554 [00:41<00:33, 35.26ex/s]
+preprocess dataset #26:  52%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                        | 1321/2554 [00:41<00:35, 34.99ex/s]
+preprocess dataset #27:  55%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                  | 1401/2554 [00:41<00:34, 32.98ex/s]
+preprocess dataset #28:  55%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                  | 1401/2554 [00:40<00:34, 33.66ex/s]
+preprocess dataset #29:  51%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                          | 1297/2554 [00:40<00:37, 33.26ex/s]
+preprocess dataset #30:  50%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                            | 1283/2554 [00:40<00:44, 28.32ex/s]
+
+
+
+
+
+
+
+
+
+preprocess dataset #4:  77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                 | 1971/2555 [01:02<00:25, 22.77ex/s]
+preprocess dataset #5:  78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                               | 1997/2555 [01:02<00:24, 23.22ex/s]
+preprocess dataset #6:  77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                 | 1972/2554 [01:02<00:28, 20.34ex/s]
+preprocess dataset #7:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                | 1991/2554 [01:02<00:22, 25.04ex/s]
+preprocess dataset #8:  77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                  | 1968/2554 [01:02<00:17, 33.52ex/s]
+preprocess dataset #9:  78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                               | 1994/2554 [01:02<00:23, 24.06ex/s]
+preprocess dataset #10:  78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                              | 2003/2554 [01:02<00:50, 10.86ex/s]
+preprocess dataset #11:  78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                               | 2002/2554 [01:02<00:48, 11.45ex/s]
+preprocess dataset #12:  78%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                | 1986/2554 [01:02<00:16, 34.41ex/s]
+preprocess dataset #13:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                               | 1999/2554 [01:02<00:19, 29.14ex/s]
+preprocess dataset #14:  77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                 | 1971/2554 [01:02<00:24, 23.86ex/s]
+preprocess dataset #15:  78%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                | 1989/2554 [01:02<00:25, 22.58ex/s]
+preprocess dataset #16:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                               | 2000/2554 [01:02<00:50, 11.05ex/s]
+preprocess dataset #17:  77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                | 1979/2554 [01:02<00:20, 27.66ex/s]
+preprocess dataset #18:  78%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                | 1980/2554 [01:02<00:22, 25.51ex/s]
+preprocess dataset #19:  77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                 | 1967/2554 [01:02<00:22, 26.21ex/s]
+preprocess dataset #20:  78%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                | 1981/2554 [01:01<00:18, 30.20ex/s]
+preprocess dataset #21:  81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                        | 2077/2554 [01:01<00:18, 25.22ex/s]
+preprocess dataset #22:  78%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                | 1984/2554 [01:01<00:22, 25.59ex/s]
+preprocess dataset #23:  78%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                | 1982/2554 [01:01<00:21, 26.13ex/s]
+preprocess dataset #24:  77%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                  | 1961/2554 [01:01<00:20, 28.78ex/s]
+preprocess dataset #25:  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                             | 2021/2554 [01:01<00:21, 24.31ex/s]
+preprocess dataset #26:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                               | 1998/2554 [01:01<00:20, 27.59ex/s]
+preprocess dataset #27:  80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                           | 2049/2554 [01:01<00:17, 29.44ex/s]
+preprocess dataset #28:  80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                           | 2048/2554 [01:01<00:17, 29.06ex/s]
+preprocess dataset #29:  77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                  | 1966/2554 [01:01<00:20, 28.20ex/s]
+preprocess dataset #30:  77%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 1954/2554 [01:01<00:19, 31.36ex/s]
+
+
+
+
+
+
+preprocess dataset #30:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎             | 2393/2554 [01:15<00:04, 35.76ex/s]
+preprocess dataset #29:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍        | 2453/2554 [01:16<00:03, 31.55ex/s]
+preprocess dataset #29:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋      | 2480/2554 [01:17<00:02, 35.63ex/s]
+preprocess dataset #30:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████        | 2461/2554 [01:17<00:02, 33.41ex/s]
+preprocess dataset #18:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 2537/2554 [01:19<00:00, 34.55ex/s]
+preprocess dataset #14: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 2549/2554 [01:20<00:00, 45.14ex/s]
+preprocess dataset #19:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 2532/2554 [01:19<00:00, 37.48ex/s]
+preprocess dataset #18: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 2543/2554 [01:20<00:00, 38.60ex/s]
+preprocess dataset #18: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2548/2554 [01:20<00:00, 40.37ex/s]
+preprocess dataset #23: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 2542/2554 [01:19<00:00, 35.06ex/s]
+preprocess dataset #19:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 2541/2554 [01:20<00:00, 37.02ex/s]
+preprocess dataset #22:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 2538/2554 [01:19<00:00, 32.92ex/s]
+preprocess dataset #22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 2542/2554 [01:19<00:00, 33.92ex/s]
+preprocess dataset #24:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 2535/2554 [01:19<00:00, 38.30ex/s]
+preprocess dataset #24:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 2539/2554 [01:19<00:00, 38.65ex/s]
+preprocess dataset #30:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏  | 2521/2554 [01:18<00:01, 33.00ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 2545/2554 [01:19<00:00, 42.50ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 2543/2554 [01:19<00:00, 30.73ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2547/2554 [01:19<00:00, 30.03ex/s]
+preprocess dataset #30:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 2531/2554 [01:19<00:00, 39.11ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2551/2554 [01:19<00:00, 30.51ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2550/2554 [01:19<00:00, 40.82ex/s]
+preprocess dataset #30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 2542/2554 [01:19<00:00, 36.89ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2550/2554 [01:19<00:00, 31.79ex/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 274.87ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 273.32ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 274.08ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 271.92ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 195.24ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 270.73ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 279.21ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 228.72ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 275.40ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 261.16ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 257.09ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 276.61ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 220.88ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 313.01ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 285.29ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 132.06ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 119.40ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 115.43ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 123.12ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 136.65ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 125.29ba/s]
+https://symbolize.stripped_domain/r/?trace=7f69774e03f4,7f69775340bf,7f,bcae4e543c754547&map=
+*** SIGTERM received by PID 3204201 (TID 3204201) on cpu 89 from PID 4852; stack trace: ***
+PC: @     0x7f69774e03f4  (unknown)  do_futex_wait.constprop.0                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @     0x7f6727d15294        976  (unknown)
+    @     0x7f69775340c0  1073716368  (unknown)                                                                                                                                                                                                                    | 0/10 [00:00<?, ?ba/s]
+    @               0x80  (unknown)  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @ 0xbcae4e543c754548  (unknown)  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+https://symbolize.stripped_domain/r/?trace=7f69774e03f4,7f6727d15293,7f69775340bf,7f,bcae4e543c754547&map=fbcd4e3f2be272741f2aecd9d840a066:7f6712778000-7f67280a7c60                                                                                               | 0/10 [00:00<?, ?ba/s]
+E0730 12:37:21.092978 3204201 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0730 12:37:21.140153 3204201 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20:  70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                        | 7/10 [00:00<00:00, 69.19ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#9: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 102.69ba/s]
+#10: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 107.87ba/s]
+#11: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.76ba/s]
+#13: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.39ba/s]
+#12: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 74.40ba/s]
+#14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 73.13ba/s]
+#15: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 108.26ba/s]
+#16: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 73.97ba/s]
+#18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.10ba/s]
+#17: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 85.86ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 107.75ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 103.72ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 107.84ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.15ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.78ba/s]
+#24: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 94.55ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 112.15ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.45ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 110.68ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 115.80ba/s]
+#29: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 85.77ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 117.25ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 117.82ba/s]
+https://symbolize.stripped_domain/r/?trace=7f69774e03f4,7f69775340bf,7f,bcae4e543c754547&map=                                                                                                                                                                       | 0/2 [00:00<?, ?ba/s]
+*** SIGTERM received by PID 3204309 (TID 3204309) on cpu 84 from PID 4852; stack trace: ***                                                                                                                                                                         | 0/2 [00:00<?, ?ba/s]
+PC: @     0x7f69774e03f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7f6727d15294        976  (unknown)
+    @     0x7f69775340c0  1073716368  (unknown)                                                                                                                                                                                                                     | 0/2 [00:00<?, ?ba/s]
+    @               0x80  (unknown)  (unknown)                                                                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+    @ 0xbcae4e543c754548  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7f69774e03f4,7f6727d15293,7f69775340bf,7f,bcae4e543c754547&map=fbcd4e3f2be272741f2aecd9d840a066:7f6712778000-7f67280a7c60                                                                                                | 0/2 [00:00<?, ?ba/s]
+E0730 12:37:25.341343 3204309 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0730 12:37:25.421703 3204309 process_state.cc:774] RAW: Raising signal 15 with default behavior                                                                                                                                                                    | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 109.89ba/s]
+#19: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 79.56ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 111.51ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 113.68ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 116.57ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 115.66ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 114.04ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 115.81ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 104.66ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 112.80ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 116.04ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 113.49ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 118.12ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+    512,%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.04,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": true,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.047,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.041,
+  "mask_feature_length": 64,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.25,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.082,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 38,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 39,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file ./preprocessor_config.json
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+2022-07-30 12:37:31.652469: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
+2022-07-30 12:37:31.652528: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
+/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+run_flax_speech_recognition_ctc.py:337: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+INFO:__main__:***** Running training *****
+INFO:__main__:  Num examples = 302693
+INFO:__main__:  Num Epochs = 40
+INFO:__main__:  Instantaneous batch size per device = 20
+INFO:__main__:  Num gradient accumulation steps = 1
+INFO:__main__:  Total train batch size (w. parallel & distributed) = 160
+INFO:__main__:  Total optimization steps = 75640
+INFO:__main__:  Gradient checkpointing: True
+INFO:__main__:  Use scan: False
+INFO:__main__:  Fuse matmuls: False
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [00:00<?, ?it/s]
+  grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)                                                                                                                                                                                     | 0/1891 [00:00<?, ?it/s]
+run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+run_flax_speech_recognition_ctc.py:1392: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+run_flax_speech_recognition_ctc.py:1399: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+2022-07-30 12:42:02.556078: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2130] Execution of replica 0 failed: RESOURCE_EXHAUSTED: Attempting to reserve 6.77G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 6.48G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation.  The nearest obstacle is at 6.48G from the bottom with size 16.0K.
+Training...:   0%|                                                                                                                                                                                                                                               | 0/1891 [04:16<?, ?it/s]
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [04:23<?, ?it/s]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 162, in reraise_with_filtered_traceback
+    return fun(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2158, in cache_miss
+    out_tree, out_flat = f_pmapped_(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/api.py", line 2034, in pmap_f
+    out = pxla.xla_pmap(
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2022, in bind
+    return map_bind(self, fun, *args, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2054, in map_bind
+    outs = primitive.process(top_trace, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 2025, in process
+    return trace.process_map(self, fun, tracers, params)
+  File "/data/flax/lib/python3.8/site-packages/jax/core.py", line 687, in process_call
+    return primitive.impl(f, *tracers, **params)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 841, in xla_pmap_impl
+    return compiled_fun(*args)
+  File "/data/flax/lib/python3.8/site-packages/jax/_src/profiler.py", line 294, in wrapper
+    return func(*args, **kwargs)
+  File "/data/flax/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1656, in __call__
+    out_bufs = self.xla_executable.execute_sharded_on_local_devices(input_bufs)
+jax._src.traceback_util.UnfilteredStackTrace: jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: Attempting to reserve 6.77G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 6.48G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation.  The nearest obstacle is at 6.48G from the bottom with size 16.0K.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
+The stack trace below excludes JAX-internal frames.
+The preceding is the original exception that occurred, unmodified.
+--------------------
+The above exception was the direct cause of the following exception:
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: Attempting to reserve 6.77G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 6.48G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation.  The nearest obstacle is at 6.48G from the bottom with size 16.0K.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
\ No newline at end of file
diff --git a/wandb/run-20220730_122457-1iypf07q/files/requirements.txt b/wandb/run-20220730_122457-1iypf07q/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab
--- /dev/null
+++ b/wandb/run-20220730_122457-1iypf07q/files/requirements.txt
@@ -0,0 +1,158 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+exceptiongroup==1.0.0rc8
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+hypothesis==6.53.0
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+jiwer==2.3.0
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pyctcdecode==0.4.0
+pygments==2.11.1
+pygtrie==2.5.0
+pyparsing==3.0.6
+python-dateutil==2.8.2
+python-levenshtein==0.12.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+sortedcontainers==2.4.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220730_122457-1iypf07q/files/wandb-metadata.json b/wandb/run-20220730_122457-1iypf07q/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..caee519277882b61ef54aff75439348abe026bd7
--- /dev/null
+++ b/wandb/run-20220730_122457-1iypf07q/files/wandb-metadata.json
@@ -0,0 +1,67 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-30T12:25:01.338750",
+    "startedAt": "2022-07-30T12:24:57.754509",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=./",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=20",
+        "--per_device_eval_batch_size=20",
+        "--gradient_accumulation_steps=1",
+        "--precision=full_mixed",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220730_122457-1iypf07q/files/wandb-summary.json b/wandb/run-20220730_122457-1iypf07q/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..894a2464300a4d110d2d70632e4ce2598fa9d6a3
--- /dev/null
+++ b/wandb/run-20220730_122457-1iypf07q/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb": {"runtime": 1026}}
\ No newline at end of file
diff --git a/wandb/run-20220730_122457-1iypf07q/logs/debug-internal.log b/wandb/run-20220730_122457-1iypf07q/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..44835b6950128a4427f042e6875c66e453d84a1a
--- /dev/null
+++ b/wandb/run-20220730_122457-1iypf07q/logs/debug-internal.log
@@ -0,0 +1,584 @@
+2022-07-30 12:24:58,729 INFO    MainThread:6773 [internal.py:wandb_internal():87] W&B internal server running at pid: 6773, started at: 2022-07-30 12:24:58.729025
+2022-07-30 12:24:58,730 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: check_version
+2022-07-30 12:24:58,731 INFO    WriterThread:6773 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/run-1iypf07q.wandb
+2022-07-30 12:24:58,732 DEBUG   SenderThread:6773 [sender.py:send():234] send: header
+2022-07-30 12:24:58,732 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: check_version
+2022-07-30 12:24:58,769 DEBUG   SenderThread:6773 [sender.py:send():234] send: run
+2022-07-30 12:24:58,965 INFO    SenderThread:6773 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files
+2022-07-30 12:24:58,965 INFO    SenderThread:6773 [sender.py:_start_run_threads():804] run started: 1iypf07q with start time 1659183897
+2022-07-30 12:24:58,965 DEBUG   SenderThread:6773 [sender.py:send():234] send: summary
+2022-07-30 12:24:58,965 INFO    SenderThread:6773 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 12:24:58,966 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: run_start
+2022-07-30 12:24:59,969 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/wandb-summary.json
+2022-07-30 12:25:01,338 DEBUG   HandlerThread:6773 [meta.py:__init__():40] meta init
+2022-07-30 12:25:01,338 DEBUG   HandlerThread:6773 [meta.py:__init__():54] meta init done
+2022-07-30 12:25:01,338 DEBUG   HandlerThread:6773 [meta.py:probe():214] probe
+2022-07-30 12:25:01,339 DEBUG   HandlerThread:6773 [meta.py:_setup_git():204] setup git
+2022-07-30 12:25:01,369 DEBUG   HandlerThread:6773 [meta.py:_setup_git():211] setup git done
+2022-07-30 12:25:01,369 DEBUG   HandlerThread:6773 [meta.py:_save_code():92] save code
+2022-07-30 12:25:01,380 DEBUG   HandlerThread:6773 [meta.py:_save_code():113] save code done
+2022-07-30 12:25:01,380 DEBUG   HandlerThread:6773 [meta.py:_save_patches():130] save patches
+2022-07-30 12:25:01,488 DEBUG   HandlerThread:6773 [meta.py:_save_patches():172] save patches done
+2022-07-30 12:25:01,489 DEBUG   HandlerThread:6773 [meta.py:_save_pip():58] save pip
+2022-07-30 12:25:01,489 DEBUG   HandlerThread:6773 [meta.py:_save_pip():72] save pip done
+2022-07-30 12:25:01,489 DEBUG   HandlerThread:6773 [meta.py:probe():252] probe done
+2022-07-30 12:25:01,536 DEBUG   SenderThread:6773 [sender.py:send():234] send: files
+2022-07-30 12:25:01,536 INFO    SenderThread:6773 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-30 12:25:01,536 INFO    SenderThread:6773 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-30 12:25:01,537 INFO    SenderThread:6773 [sender.py:_save_file():939] saving file diff.patch with policy now
+2022-07-30 12:25:01,542 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:25:01,543 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:25:01,994 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:01,995 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/requirements.txt
+2022-07-30 12:25:01,995 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/diff.patch
+2022-07-30 12:25:01,995 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/code/run_flax_speech_recognition_ctc.py
+2022-07-30 12:25:01,995 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/wandb-metadata.json
+2022-07-30 12:25:01,995 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/code
+2022-07-30 12:25:02,018 INFO    Thread-13 :6773 [upload_job.py:push():137] Uploaded file /tmp/tmpzmsvph2bwandb/3oeo3rpo-diff.patch
+2022-07-30 12:25:02,045 INFO    Thread-11 :6773 [upload_job.py:push():137] Uploaded file /tmp/tmpzmsvph2bwandb/110yn7af-wandb-metadata.json
+2022-07-30 12:25:02,282 INFO    Thread-12 :6773 [upload_job.py:push():137] Uploaded file /tmp/tmpzmsvph2bwandb/2cmn72j4-code/run_flax_speech_recognition_ctc.py
+2022-07-30 12:25:03,995 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:05,996 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:11,998 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:13,999 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:16,000 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:16,686 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:25:16,687 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:25:18,001 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:20,002 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:22,003 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:24,003 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:26,004 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:28,005 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:29,416 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:25:30,006 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:31,818 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:25:31,818 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:25:32,007 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:34,008 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:42,011 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:46,955 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:25:46,955 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:25:58,017 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:25:59,489 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:26:02,091 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:26:02,092 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:26:17,226 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:26:17,226 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:26:29,563 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:26:32,031 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:26:32,381 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:26:32,382 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:26:39,034 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:26:41,035 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:26:43,036 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:26:45,037 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:26:47,038 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:26:47,547 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:26:47,547 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:26:49,038 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:26:51,039 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:26:53,040 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:26:55,041 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:26:57,042 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:26:59,043 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:26:59,634 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:27:02,685 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:27:02,686 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:27:11,048 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:27:13,049 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:27:15,050 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:27:17,826 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:27:17,826 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:27:29,705 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:27:32,976 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:27:32,977 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:27:48,225 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:27:48,225 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:27:51,066 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:27:53,067 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:27:55,068 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:27:57,069 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:27:59,070 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:27:59,777 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:28:01,071 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:03,072 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:03,369 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:28:03,370 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:28:05,073 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:07,074 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:09,074 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:11,075 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:13,077 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:16,078 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:18,079 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:18,515 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:28:18,515 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:28:20,080 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:22,080 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:24,081 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:26,082 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:28,083 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:29,851 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:28:30,084 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:32,085 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:33,651 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:28:33,652 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:28:34,087 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:36,088 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:38,089 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:40,089 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:42,090 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:44,091 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:46,092 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:48,093 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:48,789 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:28:48,789 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:28:50,094 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:52,095 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:54,096 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:56,097 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:58,098 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:28:59,924 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:29:00,099 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:02,100 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:03,947 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:29:03,947 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:29:04,101 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:06,102 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:08,103 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:10,105 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:12,106 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:14,107 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:16,108 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:18,109 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:19,085 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:29:19,085 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:29:20,110 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:22,111 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:24,112 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:26,113 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:28,114 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:29,999 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:29:30,115 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:32,117 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:34,118 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:34,227 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:29:34,227 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:29:36,119 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:38,121 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:40,122 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:42,123 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:44,124 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:46,125 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:48,127 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:49,373 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:29:49,373 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:29:50,128 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:52,129 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:54,130 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:56,131 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:29:58,132 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:00,080 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:30:00,134 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:02,135 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:04,136 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:04,514 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:30:04,515 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:30:06,137 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:08,138 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:10,139 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:12,141 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:14,142 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:16,143 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:18,144 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:19,689 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:30:19,689 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:30:20,145 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:22,146 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:24,148 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:26,149 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:28,150 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:30,151 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:30,155 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:30:32,153 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:34,154 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:34,827 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:30:34,827 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:30:36,155 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:38,156 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:40,157 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:42,158 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:44,159 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:46,161 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:48,162 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:49,979 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:30:49,980 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:30:50,165 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:52,164 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:54,165 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:56,166 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:30:58,168 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:00,169 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:00,236 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:31:02,170 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:04,172 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:05,139 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:31:05,139 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:31:06,173 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:08,174 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:10,175 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:12,176 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:14,177 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:16,179 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:18,180 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:20,181 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:20,283 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:31:20,283 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:31:22,182 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:24,183 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:26,184 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:28,185 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:30,186 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:30,317 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:31:32,187 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:34,188 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:35,447 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:31:35,447 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:31:36,189 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:38,190 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:40,191 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:42,192 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:44,194 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:47,195 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:49,196 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:50,586 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:31:50,587 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:31:51,198 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:53,199 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:55,200 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:57,202 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:31:59,203 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:00,399 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:32:01,204 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:03,205 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:05,206 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:05,722 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:32:05,722 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:32:07,207 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:09,209 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:11,210 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:13,212 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:15,213 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:17,215 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:19,215 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:20,859 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:32:20,859 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:32:21,217 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:23,218 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:25,219 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:27,220 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:29,221 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:30,481 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:32:31,221 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:33,222 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:35,223 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:36,013 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:32:36,014 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:32:37,229 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:39,230 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:41,231 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:43,232 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:45,234 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:47,235 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:49,236 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:51,168 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:32:51,168 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:32:51,237 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:53,238 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:55,238 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:57,239 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:32:59,241 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:33:00,557 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:33:01,241 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:33:03,242 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:33:06,308 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:33:06,308 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:33:21,443 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:33:21,443 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:33:30,634 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:33:36,577 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:33:36,578 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:33:51,716 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:33:51,716 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:33:53,261 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:33:55,263 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:33:57,264 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:33:59,265 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:00,709 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:34:01,266 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:03,267 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:05,268 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:06,865 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:34:06,865 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:34:07,269 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:09,270 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:11,271 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:13,272 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:15,273 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:17,274 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:19,275 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:21,276 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:22,007 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:34:22,007 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:34:23,277 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:27,279 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:29,280 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:30,782 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:34:31,281 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:33,281 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:35,282 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:34:37,139 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:34:37,140 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:34:52,288 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:34:52,288 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:35:00,856 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:35:07,441 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:35:07,442 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:35:09,297 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:11,298 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:13,299 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:15,300 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:17,300 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:19,301 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:21,302 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:22,661 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:35:22,662 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:35:23,303 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:25,304 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:27,305 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:29,306 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:30,934 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:35:31,307 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:33,308 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:35,309 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:37,309 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:37,819 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:35:37,820 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:35:39,311 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:41,312 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:43,313 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:45,314 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:47,315 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:49,315 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:51,316 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:52,958 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:35:52,958 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:35:53,317 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:55,318 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:57,319 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:35:59,320 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:01,018 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:36:01,321 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:03,322 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:05,323 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:07,324 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:08,111 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:36:08,112 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:36:09,326 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:11,326 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:13,327 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:15,328 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:17,329 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:19,330 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:21,331 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:23,261 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:36:23,262 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:36:23,333 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:25,333 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:27,334 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:30,335 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:31,093 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:36:32,336 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:36:38,404 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:36:38,404 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:36:53,537 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:36:53,537 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:37:01,177 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:37:08,689 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:37:08,690 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:37:14,353 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:37:18,355 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:37:22,357 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:37:23,948 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:37:23,948 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:37:26,358 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:37:30,360 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:37:31,256 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:37:32,361 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:37:36,363 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:37:38,364 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:37:39,151 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:37:39,151 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:37:41,365 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:37:47,368 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:37:54,451 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:37:54,451 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:38:01,333 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:38:09,730 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:38:09,730 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:38:24,900 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:38:24,900 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:38:26,384 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:38:31,412 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:38:34,387 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:38:36,388 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:38:40,067 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:38:40,067 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:38:55,237 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:38:55,238 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:39:01,491 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:39:10,390 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:39:10,390 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:39:25,522 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:39:25,523 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:39:31,568 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:39:40,661 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:39:40,661 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:39:55,794 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:39:55,795 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:40:01,642 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:40:10,937 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:40:10,937 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:40:26,071 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:40:26,071 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:40:31,715 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:40:41,207 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:40:41,207 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:40:56,343 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:40:56,344 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:41:01,785 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:41:11,484 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:41:11,484 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:41:26,638 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:41:26,639 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:41:31,856 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:41:41,777 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:41:41,777 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:41:56,911 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:41:56,911 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:42:01,934 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:42:03,472 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:42:05,135 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:42:05,135 DEBUG   SenderThread:6773 [sender.py:send():234] send: telemetry
+2022-07-30 12:42:05,135 DEBUG   SenderThread:6773 [sender.py:send():234] send: exit
+2022-07-30 12:42:05,135 INFO    SenderThread:6773 [sender.py:send_exit():366] handling exit code: 1
+2022-07-30 12:42:05,136 INFO    SenderThread:6773 [sender.py:send_exit():368] handling runtime: 1026
+2022-07-30 12:42:05,136 INFO    SenderThread:6773 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 12:42:05,136 INFO    SenderThread:6773 [sender.py:send_exit():374] send defer
+2022-07-30 12:42:05,136 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:42:05,137 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:42:05,137 INFO    HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-30 12:42:05,138 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:42:05,138 INFO    SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-30 12:42:05,138 INFO    SenderThread:6773 [sender.py:transition_state():387] send defer: 1
+2022-07-30 12:42:05,138 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:42:05,138 INFO    HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-30 12:42:05,142 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:42:05,142 INFO    SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-30 12:42:05,142 INFO    SenderThread:6773 [sender.py:transition_state():387] send defer: 2
+2022-07-30 12:42:05,142 DEBUG   SenderThread:6773 [sender.py:send():234] send: stats
+2022-07-30 12:42:05,143 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:42:05,143 INFO    HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-30 12:42:05,143 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:42:05,143 INFO    SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-30 12:42:05,143 INFO    SenderThread:6773 [sender.py:transition_state():387] send defer: 3
+2022-07-30 12:42:05,143 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:42:05,143 INFO    HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-30 12:42:05,143 DEBUG   SenderThread:6773 [sender.py:send():234] send: summary
+2022-07-30 12:42:05,144 INFO    SenderThread:6773 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 12:42:05,144 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:42:05,144 INFO    SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-30 12:42:05,144 INFO    SenderThread:6773 [sender.py:transition_state():387] send defer: 4
+2022-07-30 12:42:05,144 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:42:05,144 INFO    HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-30 12:42:05,144 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:42:05,144 INFO    SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-30 12:42:05,239 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:42:05,303 INFO    SenderThread:6773 [sender.py:transition_state():387] send defer: 5
+2022-07-30 12:42:05,303 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:42:05,303 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:42:05,303 INFO    HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-30 12:42:05,304 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:42:05,304 INFO    SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-30 12:42:05,304 INFO    SenderThread:6773 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-30 12:42:05,404 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:42:05,472 INFO    Thread-8  :6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:42:05,473 INFO    SenderThread:6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/config.yaml
+2022-07-30 12:42:05,473 INFO    SenderThread:6773 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/wandb-summary.json
+2022-07-30 12:42:05,473 INFO    SenderThread:6773 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files
+2022-07-30 12:42:05,473 INFO    SenderThread:6773 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/config.yaml config.yaml
+2022-07-30 12:42:05,474 INFO    SenderThread:6773 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/diff.patch diff.patch
+2022-07-30 12:42:05,474 INFO    SenderThread:6773 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/requirements.txt requirements.txt
+2022-07-30 12:42:05,474 INFO    SenderThread:6773 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log output.log
+2022-07-30 12:42:05,474 INFO    SenderThread:6773 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/wandb-summary.json wandb-summary.json
+2022-07-30 12:42:05,474 INFO    SenderThread:6773 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/wandb-metadata.json wandb-metadata.json
+2022-07-30 12:42:05,477 INFO    SenderThread:6773 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-30 12:42:05,477 INFO    SenderThread:6773 [sender.py:transition_state():387] send defer: 6
+2022-07-30 12:42:05,477 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:42:05,478 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:42:05,480 INFO    HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-30 12:42:05,483 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:42:05,484 INFO    SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-30 12:42:05,484 INFO    SenderThread:6773 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 12:42:05,582 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:42:05,582 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:42:05,683 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:42:05,683 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:42:05,785 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:42:05,785 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:42:05,886 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:42:05,886 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:42:05,932 INFO    Thread-17 :6773 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/wandb-summary.json
+2022-07-30 12:42:05,943 INFO    Thread-14 :6773 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/config.yaml
+2022-07-30 12:42:05,963 INFO    Thread-15 :6773 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/requirements.txt
+2022-07-30 12:42:05,987 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:42:05,988 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:42:06,089 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:42:06,089 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:42:06,154 INFO    Thread-16 :6773 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/files/output.log
+2022-07-30 12:42:06,190 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:42:06,191 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:42:06,292 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:42:06,292 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:42:06,355 INFO    Thread-7  :6773 [sender.py:transition_state():387] send defer: 7
+2022-07-30 12:42:06,356 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:42:06,356 INFO    HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-30 12:42:06,356 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:42:06,356 INFO    SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-30 12:42:06,394 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:42:06,763 INFO    SenderThread:6773 [sender.py:transition_state():387] send defer: 8
+2022-07-30 12:42:06,763 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:42:06,764 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:42:06,764 INFO    HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-30 12:42:06,764 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:42:06,764 INFO    SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-30 12:42:06,764 INFO    SenderThread:6773 [sender.py:transition_state():387] send defer: 9
+2022-07-30 12:42:06,765 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 12:42:06,765 INFO    HandlerThread:6773 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-30 12:42:06,765 DEBUG   SenderThread:6773 [sender.py:send():234] send: final
+2022-07-30 12:42:06,765 DEBUG   SenderThread:6773 [sender.py:send():234] send: footer
+2022-07-30 12:42:06,765 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: defer
+2022-07-30 12:42:06,765 INFO    SenderThread:6773 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-30 12:42:06,865 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 12:42:06,865 DEBUG   SenderThread:6773 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 12:42:06,865 INFO    SenderThread:6773 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 12:42:07,121 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-30 12:42:07,122 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-30 12:42:07,123 DEBUG   HandlerThread:6773 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-30 12:42:07,123 INFO    HandlerThread:6773 [handler.py:finish():731] shutting down handler
+2022-07-30 12:42:07,765 INFO    WriterThread:6773 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/run-1iypf07q.wandb
+2022-07-30 12:42:08,120 INFO    SenderThread:6773 [sender.py:finish():1070] shutting down sender
+2022-07-30 12:42:08,120 INFO    SenderThread:6773 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 12:42:08,120 INFO    SenderThread:6773 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 12:42:08,123 INFO    MainThread:6773 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220730_122457-1iypf07q/logs/debug.log b/wandb/run-20220730_122457-1iypf07q/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..376298cce4720df6df3f79b173e869941adc502a
--- /dev/null
+++ b/wandb/run-20220730_122457-1iypf07q/logs/debug.log
@@ -0,0 +1,148 @@
+2022-07-30 12:24:57,797 INFO    MainThread:4852 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-30 12:24:57,797 INFO    MainThread:4852 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-30 12:24:57,797 INFO    MainThread:4852 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/logs/debug.log
+2022-07-30 12:24:57,797 INFO    MainThread:4852 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_122457-1iypf07q/logs/debug-internal.log
+2022-07-30 12:24:57,798 INFO    MainThread:4852 [wandb_init.py:init():404] calling init triggers
+2022-07-30 12:24:57,798 INFO    MainThread:4852 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-30 12:24:57,798 INFO    MainThread:4852 [wandb_init.py:init():460] starting backend
+2022-07-30 12:24:57,798 INFO    MainThread:4852 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-30 12:24:57,868 INFO    MainThread:4852 [backend.py:ensure_launched():216] starting backend process...
+2022-07-30 12:24:57,894 INFO    MainThread:4852 [backend.py:ensure_launched():221] started backend process with pid: 6773
+2022-07-30 12:24:57,896 INFO    MainThread:4852 [wandb_init.py:init():469] backend started and connected
+2022-07-30 12:24:57,909 INFO    MainThread:4852 [wandb_init.py:init():533] updated telemetry
+2022-07-30 12:24:57,975 INFO    MainThread:4852 [wandb_init.py:init():563] communicating current version
+2022-07-30 12:24:58,768 INFO    MainThread:4852 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-30 12:24:58,768 INFO    MainThread:4852 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-30 12:24:58,965 INFO    MainThread:4852 [wandb_init.py:init():606] starting run threads in backend
+2022-07-30 12:25:01,539 INFO    MainThread:4852 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-30 12:25:01,540 INFO    MainThread:4852 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-30 12:25:01,541 INFO    MainThread:4852 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-30 12:25:01,543 INFO    MainThread:4852 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-30 12:25:01,543 INFO    MainThread:4852 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-30 12:42:02,970 INFO    MainThread:4852 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-30 12:42:02,975 INFO    MainThread:4852 [wandb_run.py:_restore():1752] restore
+2022-07-30 12:42:05,137 INFO    MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 74351
+}
+
+2022-07-30 12:42:05,303 INFO    MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 74351
+}
+
+2022-07-30 12:42:05,481 INFO    MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 336868
+}
+
+2022-07-30 12:42:05,582 INFO    MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 336868
+}
+
+2022-07-30 12:42:05,684 INFO    MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 336868
+  total_bytes: 336868
+}
+
+2022-07-30 12:42:05,785 INFO    MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 336868
+  total_bytes: 336868
+}
+
+2022-07-30 12:42:05,887 INFO    MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 336868
+  total_bytes: 336868
+}
+
+2022-07-30 12:42:05,988 INFO    MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 336868
+  total_bytes: 336868
+}
+
+2022-07-30 12:42:06,089 INFO    MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 336868
+  total_bytes: 336868
+}
+
+2022-07-30 12:42:06,191 INFO    MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 336868
+  total_bytes: 336868
+}
+
+2022-07-30 12:42:06,293 INFO    MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 336868
+  total_bytes: 336868
+}
+
+2022-07-30 12:42:06,764 INFO    MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 336868
+  total_bytes: 336868
+}
+
+2022-07-30 12:42:07,120 INFO    MainThread:4852 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 336868
+  total_bytes: 336868
+}
+local_info {
+}
+
+2022-07-30 12:42:10,566 INFO    MainThread:4852 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220730_122457-1iypf07q/run-1iypf07q.wandb b/wandb/run-20220730_122457-1iypf07q/run-1iypf07q.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..09be673e3622b0ed1eb7b9b9496b1d01a0cf630e
--- /dev/null
+++ b/wandb/run-20220730_122457-1iypf07q/run-1iypf07q.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6df3eec25c410cbde4963b6493592cf98914d7abc365c2e88e74249bcc895a9b
+size 386029
diff --git a/wandb/run-20220730_124505-101ubxa3/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_124505-101ubxa3/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac
--- /dev/null
+++ b/wandb/run-20220730_124505-101ubxa3/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1605 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220730_124505-101ubxa3/files/config.yaml b/wandb/run-20220730_124505-101ubxa3/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c6c985261e2400770199c7090235b75fc0cad92f
--- /dev/null
+++ b/wandb/run-20220730_124505-101ubxa3/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659185105
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220730_124505-101ubxa3/files/diff.patch b/wandb/run-20220730_124505-101ubxa3/files/diff.patch
new file mode 100644
index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642
--- /dev/null
+++ b/wandb/run-20220730_124505-101ubxa3/files/diff.patch
@@ -0,0 +1,10 @@
+diff --git a/.gitattributes b/.gitattributes
+index 755356a..f0eef4b 100644
+--- a/.gitattributes
++++ b/.gitattributes
+@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+ *.zip filter=lfs diff=lfs merge=lfs -text
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
++*.wandb filter=lfs diff=lfs merge=lfs -text
+\ No newline at end of file
diff --git a/wandb/run-20220730_124505-101ubxa3/files/output.log b/wandb/run-20220730_124505-101ubxa3/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..746cee0d5a85642b26e86bbd49088b3816e54454
--- /dev/null
+++ b/wandb/run-20220730_124505-101ubxa3/files/output.log
@@ -0,0 +1,1372 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul30_12-45-01_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=16,
+per_device_train_batch_size=16,
+precision=full_mixed,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 81.78it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 440.49it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_q', 'kernel'), ('project_hid', 'bias'), ('quantizer', 'codevectors'), ('quantizer', 'weight_proj', 'kernel'), ('project_q', 'bias'), ('quantizer', 'weight_proj', 'bias'), ('project_hid', 'kernel')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'bias'), ('lm_head', 'kernel')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #0:   8%|████████████████▎                                                                                                                                                                                     | 784/9523 [00:00<00:01, 7838.49ex/s]
+removing punctuation from train split #1:   0%|                                                                                                                                                                                                                  | 0/9523 [00:00<?, ?ex/s]
+removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8438.44ex/s]
+removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9248.58ex/s]
+removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9133.17ex/s]
+removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8977.53ex/s]
+removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8847.43ex/s]
+removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7956.65ex/s]
+removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8733.31ex/s]
+removing punctuation from train split #9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8594.70ex/s]
+removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7369.20ex/s]
+removing punctuation from train split #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8717.90ex/s]
+removing punctuation from train split #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 9112.89ex/s]
+removing punctuation from train split #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8500.27ex/s]
+removing punctuation from train split #14: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8743.36ex/s]
+removing punctuation from train split #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8638.16ex/s]
+removing punctuation from train split #10:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎           | 8958/9523 [00:01<00:00, 8513.85ex/s]
+removing punctuation from train split #11:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                               | 7986/9523 [00:00<00:00, 8514.78ex/s]
+removing punctuation from train split #12:  74%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                  | 7073/9522 [00:00<00:00, 8998.37ex/s]
+removing punctuation from train split #11:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎             | 8861/9523 [00:01<00:00, 8584.14ex/s]
+removing punctuation from train split #14:  65%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                     | 6153/9522 [00:00<00:00, 8824.77ex/s]
+removing punctuation from train split #15:  44%|█████████████████████████████████████████████████████████████████████████████████████▍                                                                                                              | 4148/9522 [00:00<00:00, 8483.43ex/s]
+removing punctuation from train split #14:  74%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 7046/9522 [00:00<00:00, 8855.79ex/s]
+removing punctuation from train split #13:  82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                   | 7780/9522 [00:00<00:00, 8367.62ex/s]
+removing punctuation from train split #12:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋         | 9071/9522 [00:01<00:00, 9526.42ex/s]
+removing punctuation from train split #13:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                 | 8661/9522 [00:01<00:00, 8497.68ex/s]
+removing punctuation from train split #13: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 9514/9522 [00:01<00:00, 8487.49ex/s]
+removing punctuation from train split #14:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊              | 8830/9522 [00:01<00:00, 8690.12ex/s]
+removing punctuation from train split #15:  71%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                        | 6803/9522 [00:00<00:00, 8739.55ex/s]
+removing punctuation from train split #16:  73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                    | 6963/9522 [00:00<00:00, 8739.41ex/s]
+removing punctuation from train split #15:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                     | 8503/9522 [00:01<00:00, 8070.60ex/s]
+removing punctuation from train split #16:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                | 8731/9522 [00:01<00:00, 8574.47ex/s]
+removing punctuation from train split #17:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                  | 8652/9522 [00:01<00:00, 8723.69ex/s]
+removing punctuation from train split #18:  83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                 | 7895/9522 [00:00<00:00, 8706.56ex/s]
+removing punctuation from train split #19:  79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                         | 7515/9522 [00:00<00:00, 8463.27ex/s]
+removing punctuation from train split #20:  73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                     | 6949/9522 [00:00<00:00, 8827.15ex/s]
+removing punctuation from train split #21:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                       | 6814/9522 [00:00<00:00, 8739.12ex/s]
+removing punctuation from train split #22:  64%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                      | 6087/9522 [00:00<00:00, 8814.34ex/s]
+removing punctuation from train split #23:  64%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                      | 6096/9522 [00:00<00:00, 8802.22ex/s]
+removing punctuation from train split #24:  53%|████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                           | 5088/9522 [00:00<00:00, 8622.64ex/s]
+removing punctuation from train split #25:  46%|█████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                          | 4335/9522 [00:00<00:00, 8784.80ex/s]
+removing punctuation from train split #26:  35%|█████████████████████████████████████████████████████████████████████▌                                                                                                                              | 3378/9522 [00:00<00:00, 8619.74ex/s]
+removing punctuation from train split #27:  36%|███████████████████████████████████████████████████████████████████████▎                                                                                                                            | 3462/9522 [00:00<00:00, 8756.09ex/s]
+removing punctuation from train split #28:  27%|███████████████████████████████████████████████████▉                                                                                                                                                | 2525/9522 [00:00<00:00, 8511.49ex/s]
+removing punctuation from train split #29:  13%|█████████████████████████▏                                                                                                                                                                          | 1226/9522 [00:00<00:01, 6273.25ex/s]
+removing punctuation from train split #30:  16%|██████████████████████████████▍                                                                                                                                                                     | 1480/9522 [00:00<00:01, 7598.25ex/s]
+removing punctuation from train split #25:  74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                   | 7034/9522 [00:00<00:00, 8947.54ex/s]
+removing punctuation from train split #24:  91%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                | 8709/9522 [00:01<00:00, 8553.50ex/s]
+removing punctuation from train split #25:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                | 7929/9522 [00:00<00:00, 8579.35ex/s]
+removing punctuation from train split #26:  74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                   | 7004/9522 [00:00<00:00, 8999.33ex/s]
+removing punctuation from train split #25:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌             | 8867/9522 [00:01<00:00, 8819.19ex/s]
+removing punctuation from train split #26:  83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                 | 7904/9522 [00:00<00:00, 8608.69ex/s]
+removing punctuation from train split #26:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊              | 8831/9522 [00:01<00:00, 8803.17ex/s]
+removing punctuation from train split #27:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌            | 8920/9522 [00:01<00:00, 8842.96ex/s]
+removing punctuation from train split #28:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                               | 7985/9522 [00:00<00:00, 8712.91ex/s]
+removing punctuation from train split #28:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████            | 8943/9522 [00:01<00:00, 8970.92ex/s]
+removing punctuation from train split #29:  79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                         | 7488/9522 [00:00<00:00, 8608.62ex/s]
+removing punctuation from train split #31:  66%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                 | 6323/9522 [00:00<00:00, 9263.20ex/s]
+removing punctuation from train split #29:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏  | 9386/9522 [00:01<00:00, 9067.96ex/s]
+removing punctuation from train split #30:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                | 8750/9522 [00:01<00:00, 9137.21ex/s]
+removing punctuation from train split #31:  87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                          | 8248/9522 [00:00<00:00, 8978.93ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00029_of_00032.arrow9217/9522 [00:01<00:00, 9188.36ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow
+preprocess dataset #0:   1%|██                                                                                                                                                                                                                          | 87/9497 [00:02<02:51, 54.81ex/s]
+preprocess dataset #1:   1%|█▏                                                                                                                                                                                                                          | 50/9497 [00:01<02:57, 53.13ex/s]
+preprocess dataset #2:   0%|▌                                                                                                                                                                                                                           | 25/9497 [00:00<03:38, 43.42ex/s]
+preprocess dataset #3:   1%|██▌                                                                                                                                                                                                                        | 113/9497 [00:02<02:31, 61.87ex/s]
+preprocess dataset #4:   1%|█▊                                                                                                                                                                                                                          | 79/9497 [00:01<02:24, 64.96ex/s]
+preprocess dataset #5:   0%|                                                                                                                                                                                                                           | 1/9497 [00:00<1:27:20,  1.81ex/s]
+preprocess dataset #6:   1%|██▌                                                                                                                                                                                                                        | 109/9497 [00:02<02:38, 59.25ex/s]
+preprocess dataset #7:   1%|█▎                                                                                                                                                                                                                          | 59/9497 [00:01<02:44, 57.27ex/s]
+preprocess dataset #8:   1%|██                                                                                                                                                                                                                          | 89/9497 [00:02<02:51, 54.78ex/s]
+preprocess dataset #9:   0%|▋                                                                                                                                                                                                                           | 30/9497 [00:01<05:10, 30.52ex/s]
+preprocess dataset #10:   1%|█▋                                                                                                                                                                                                                         | 73/9497 [00:02<04:45, 32.97ex/s]
+preprocess dataset #11:   0%|█                                                                                                                                                                                                                          | 44/9496 [00:01<04:22, 35.98ex/s]
+preprocess dataset #12:   0%|▏                                                                                                                                                                                                                           | 9/9496 [00:00<10:30, 15.04ex/s]
+preprocess dataset #13:   0%|█                                                                                                                                                                                                                          | 46/9496 [00:01<04:33, 34.60ex/s]
+preprocess dataset #14:   1%|█▉                                                                                                                                                                                                                         | 85/9496 [00:02<04:34, 34.24ex/s]
+preprocess dataset #15:   1%|█▏                                                                                                                                                                                                                         | 51/9496 [00:01<04:29, 35.10ex/s]
+preprocess dataset #16:   1%|██▍                                                                                                                                                                                                                       | 104/9496 [00:03<04:08, 37.76ex/s]
+preprocess dataset #17:   0%|█                                                                                                                                                                                                                          | 47/9496 [00:01<04:49, 32.66ex/s]
+preprocess dataset #18:   0%|▏                                                                                                                                                                                                                           | 9/9496 [00:00<12:02, 13.13ex/s]
+preprocess dataset #19:   1%|█▎                                                                                                                                                                                                                         | 58/9496 [00:02<04:22, 36.02ex/s]
+preprocess dataset #20:   0%|▏                                                                                                                                                                                                                           | 9/9496 [00:00<10:44, 14.71ex/s]
+preprocess dataset #21:   0%|▌                                                                                                                                                                                                                          | 27/9496 [00:01<05:01, 31.38ex/s]
+preprocess dataset #22:   1%|██                                                                                                                                                                                                                         | 92/9496 [00:03<04:56, 31.70ex/s]
+preprocess dataset #23:   0%|▋                                                                                                                                                                                                                          | 28/9496 [00:01<05:46, 27.30ex/s]
+preprocess dataset #24:   1%|█▎                                                                                                                                                                                                                         | 56/9496 [00:02<06:42, 23.44ex/s]
+preprocess dataset #25:   0%|▉                                                                                                                                                                                                                          | 43/9496 [00:01<04:14, 37.09ex/s]
+preprocess dataset #26:   1%|█▏                                                                                                                                                                                                                         | 50/9496 [00:02<06:07, 25.73ex/s]
+preprocess dataset #27:   1%|█▌                                                                                                                                                                                                                         | 69/9496 [00:02<05:31, 28.44ex/s]
+preprocess dataset #28:   0%|▊                                                                                                                                                                                                                          | 35/9496 [00:02<06:35, 23.94ex/s]
+preprocess dataset #29:   1%|█▋                                                                                                                                                                                                                         | 73/9496 [00:03<05:12, 30.11ex/s]
+preprocess dataset #30:   0%|▋                                                                                                                                                                                                                          | 32/9496 [00:01<04:48, 32.75ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  30%|██████████████████████████████████████████████████████████████████                                                                                                                                                       | 2890/9496 [01:36<04:07, 26.64ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #23:  65%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                           | 6205/9496 [03:24<01:38, 33.45ex/s]
+preprocess dataset #24:  64%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                             | 6114/9496 [03:23<01:40, 33.52ex/s]
+preprocess dataset #25:  68%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                      | 6416/9496 [03:22<01:21, 37.74ex/s]
+preprocess dataset #26:  64%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                              | 6061/9496 [03:21<01:48, 31.79ex/s]
+preprocess dataset #27:  64%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                              | 6087/9496 [03:20<01:54, 29.80ex/s]
+preprocess dataset #28:  64%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                               | 6042/9496 [03:19<01:54, 30.18ex/s]
+preprocess dataset #29:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                        | 6323/9496 [03:18<01:31, 34.65ex/s]
+preprocess dataset #30:  63%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                | 5962/9496 [03:16<01:55, 30.59ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #20:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                      | 7813/9496 [04:06<00:49, 34.10ex/s]
+preprocess dataset #21:  78%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                | 7371/9496 [04:05<01:06, 31.76ex/s]
+preprocess dataset #22:  81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                        | 7738/9496 [04:04<00:54, 32.43ex/s]
+preprocess dataset #23:  77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                 | 7353/9496 [04:02<01:09, 31.04ex/s]
+preprocess dataset #25:  80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                          | 7643/9496 [04:00<00:52, 35.00ex/s]
+preprocess dataset #26:  76%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                   | 7234/9496 [03:59<01:05, 34.36ex/s]
+preprocess dataset #28:  76%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                     | 7178/9496 [03:57<01:13, 31.67ex/s]
+preprocess dataset #29:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                            | 7550/9496 [03:56<01:01, 31.79ex/s]
+preprocess dataset #30:  75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                       | 7091/9496 [03:54<01:08, 34.93ex/s]
+
+
+
+preprocess dataset #28:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎         | 9073/9496 [04:55<00:08, 47.19ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 9471/9496 [04:54<00:00, 45.75ex/s]
+preprocess dataset #30:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊           | 9005/9496 [04:52<00:30, 16.02ex/s]
+preprocess dataset #28:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊       | 9184/9496 [04:57<00:05, 61.98ex/s]
+preprocess dataset #30:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌         | 9083/9496 [04:53<00:07, 58.33ex/s]
+preprocess dataset #30:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊        | 9139/9496 [04:54<00:06, 56.41ex/s]
+preprocess dataset #28:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 9322/9496 [04:59<00:02, 71.32ex/s]
+preprocess dataset #30:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊     | 9269/9496 [04:56<00:03, 68.17ex/s]
+preprocess dataset #30:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉   | 9362/9496 [04:58<00:01, 75.06ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9425/9496 [05:01<00:01, 54.16ex/s]
+preprocess dataset #30:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 9379/9496 [04:58<00:01, 62.16ex/s]
+preprocess dataset #30:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 9448/9496 [04:59<00:00, 73.10ex/s]
+preprocess dataset #4:   3%|███████▍                                                                                                                                                                                                                    | 43/1267 [00:01<00:38, 31.61ex/s]
+preprocess dataset #5:   4%|███████▊                                                                                                                                                                                                                    | 45/1267 [00:01<00:34, 35.10ex/s]
+preprocess dataset #6:   4%|███████▊                                                                                                                                                                                                                    | 45/1267 [00:01<00:36, 33.32ex/s]
+preprocess dataset #7:   3%|███████▍                                                                                                                                                                                                                    | 43/1267 [00:01<00:38, 31.90ex/s]
+preprocess dataset #8:   4%|███████▉                                                                                                                                                                                                                    | 46/1267 [00:01<00:33, 36.41ex/s]
+preprocess dataset #9:   4%|███████▊                                                                                                                                                                                                                    | 45/1267 [00:01<00:34, 35.57ex/s]
+preprocess dataset #10:   3%|██████▋                                                                                                                                                                                                                    | 39/1267 [00:01<00:39, 31.13ex/s]
+preprocess dataset #11:   3%|██████▉                                                                                                                                                                                                                    | 40/1267 [00:01<00:36, 33.27ex/s]
+preprocess dataset #12:   4%|███████▊                                                                                                                                                                                                                   | 45/1267 [00:01<00:33, 36.26ex/s]
+preprocess dataset #13:   3%|██████▋                                                                                                                                                                                                                    | 39/1267 [00:01<00:36, 33.77ex/s]
+preprocess dataset #14:   3%|██████▋                                                                                                                                                                                                                    | 39/1267 [00:01<00:33, 36.72ex/s]
+preprocess dataset #15:   3%|██████▋                                                                                                                                                                                                                    | 39/1267 [00:01<00:34, 35.94ex/s]
+preprocess dataset #16:   3%|█████▉                                                                                                                                                                                                                     | 34/1267 [00:01<00:35, 34.44ex/s]
+preprocess dataset #17:   3%|██████                                                                                                                                                                                                                     | 35/1267 [00:01<00:35, 34.24ex/s]
+preprocess dataset #18:   3%|██████                                                                                                                                                                                                                     | 35/1267 [00:01<00:36, 33.56ex/s]
+preprocess dataset #19:   2%|█████▏                                                                                                                                                                                                                     | 30/1267 [00:01<00:36, 33.87ex/s]
+preprocess dataset #20:   2%|████▋                                                                                                                                                                                                                      | 27/1266 [00:01<00:41, 29.81ex/s]
+preprocess dataset #21:   2%|████▏                                                                                                                                                                                                                      | 24/1266 [00:01<00:42, 29.25ex/s]
+preprocess dataset #22:   2%|█████                                                                                                                                                                                                                      | 29/1266 [00:01<00:38, 32.43ex/s]
+preprocess dataset #23:   2%|█████                                                                                                                                                                                                                      | 29/1266 [00:01<00:35, 34.70ex/s]
+preprocess dataset #24:   2%|████▍                                                                                                                                                                                                                      | 26/1266 [00:01<00:38, 32.39ex/s]
+preprocess dataset #25:   2%|████▎                                                                                                                                                                                                                      | 25/1266 [00:01<00:40, 30.57ex/s]
+preprocess dataset #26:   2%|████▏                                                                                                                                                                                                                      | 24/1266 [00:01<00:42, 29.10ex/s]
+preprocess dataset #27:   2%|████▏                                                                                                                                                                                                                      | 24/1266 [00:01<00:40, 30.30ex/s]
+preprocess dataset #28:   2%|███▍                                                                                                                                                                                                                       | 20/1266 [00:01<00:45, 27.62ex/s]
+preprocess dataset #29:   2%|███▎                                                                                                                                                                                                                       | 19/1266 [00:01<00:47, 25.99ex/s]
+preprocess dataset #30:   2%|███▎                                                                                                                                                                                                                       | 19/1266 [00:01<00:50, 24.76ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #31:  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                              | 998/1266 [00:28<00:08, 32.59ex/s]
+preprocess dataset #26:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                           | 1012/1266 [00:37<01:19,  3.19ex/s]
+preprocess dataset #27:  81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                         | 1024/1266 [00:37<00:23, 10.46ex/s]
+preprocess dataset #28:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                           | 1012/1266 [00:37<00:59,  4.27ex/s]
+preprocess dataset #29:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                            | 1009/1266 [00:37<01:21,  3.17ex/s]
+preprocess dataset #30:  80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                          | 1016/1266 [00:37<00:39,  6.31ex/s]
+preprocess dataset #29:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                | 1077/1266 [00:38<00:04, 38.70ex/s]
+preprocess dataset #30:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                              | 1086/1266 [00:38<00:04, 40.63ex/s]
+
+preprocess dataset #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 1263/1266 [00:43<00:00, 41.22ex/s]
+preprocess dataset #16:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1260/1267 [00:43<00:00, 43.26ex/s]
+preprocess dataset #28:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉   | 1248/1266 [00:43<00:00, 35.23ex/s]
+preprocess dataset #29:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋    | 1241/1266 [00:43<00:00, 38.94ex/s]
+preprocess dataset #26:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 1256/1266 [00:43<00:00, 42.51ex/s]
+preprocess dataset #20: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 1262/1266 [00:43<00:00, 33.68ex/s]
+preprocess dataset #30:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1259/1266 [00:43<00:00, 35.22ex/s]
+preprocess dataset #27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1266/1266 [00:43<00:00, 37.61ex/s]
+preprocess dataset #26: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1261/1266 [00:43<00:00, 39.94ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 1257/1266 [00:43<00:00, 36.52ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 1264/1266 [00:43<00:00, 38.39ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 1263/1266 [00:43<00:00, 41.12ex/s]
+preprocess dataset #29:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 1257/1266 [00:43<00:00, 44.31ex/s]
+preprocess dataset #1:   0%|                                                                                                                                                                                                                                     | 0/2555 [00:00<?, ?ex/s]
+preprocess dataset #2:   0%|                                                                                                                                                                                                                                     | 0/2555 [00:00<?, ?ex/s]
+preprocess dataset #3:   0%|                                                                                                                                                                                                                                     | 0/2555 [00:00<?, ?ex/s]
+preprocess dataset #4:   0%|                                                                                                                                                                                                                                     | 0/2555 [00:00<?, ?ex/s]
+preprocess dataset #5:   0%|                                                                                                                                                                                                                                     | 0/2555 [00:00<?, ?ex/s]
+preprocess dataset #6:   0%|                                                                                                                                                                                                                                     | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #7:   0%|                                                                                                                                                                                                                                     | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #8:   0%|                                                                                                                                                                                                                                     | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #9:   0%|                                                                                                                                                                                                                                     | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #10:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #11:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #12:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #13:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #14:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #15:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #16:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #17:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #18:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #19:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #20:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #21:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #22:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #23:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #24:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #25:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #26:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #27:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #28:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #29:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #30:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #31:  36%|█████████████████████████████████████████████████████████████████████████████▋                                                                                                                                            | 910/2554 [00:26<00:45, 36.10ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌               | 2373/2554 [01:14<00:05, 31.76ex/s]
+preprocess dataset #30:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎         | 2440/2554 [01:16<00:03, 35.59ex/s]
+preprocess dataset #11: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2554/2554 [01:18<00:00, 32.35ex/s]
+preprocess dataset #2:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌     | 2492/2555 [01:19<00:01, 36.83ex/s]
+preprocess dataset #2:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████     | 2497/2555 [01:19<00:01, 38.75ex/s]
+preprocess dataset #2:  98%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍   | 2513/2555 [01:19<00:01, 32.96ex/s]
+preprocess dataset #3:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 2531/2555 [01:19<00:00, 31.83ex/s]
+preprocess dataset #4:  98%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋   | 2516/2555 [01:19<00:01, 28.23ex/s]
+preprocess dataset #5:  98%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎   | 2512/2555 [01:19<00:01, 33.67ex/s]
+preprocess dataset #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 2549/2554 [01:19<00:00, 37.96ex/s]
+preprocess dataset #7:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 2523/2554 [01:19<00:00, 32.89ex/s]
+preprocess dataset #10:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 2534/2554 [01:18<00:00, 29.74ex/s]
+preprocess dataset #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2550/2554 [01:18<00:00, 33.64ex/s]
+preprocess dataset #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2550/2554 [01:19<00:00, 26.42ex/s]
+preprocess dataset #12:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 2508/2554 [01:18<00:01, 37.39ex/s]
+preprocess dataset #12:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 2524/2554 [01:19<00:00, 30.26ex/s]
+preprocess dataset #13:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 2513/2554 [01:19<00:01, 28.15ex/s]
+preprocess dataset #14:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 2520/2554 [01:19<00:01, 29.97ex/s]
+preprocess dataset #15:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 2519/2554 [01:19<00:01, 31.81ex/s]
+preprocess dataset #16:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 2513/2554 [01:19<00:01, 30.02ex/s]
+preprocess dataset #17:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 2533/2554 [01:19<00:00, 32.23ex/s]
+preprocess dataset #18: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2550/2554 [01:19<00:00, 29.44ex/s]
+preprocess dataset #19:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 2524/2554 [01:19<00:00, 36.45ex/s]
+preprocess dataset #21:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏  | 2521/2554 [01:18<00:00, 38.78ex/s]
+preprocess dataset #21:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 2537/2554 [01:19<00:00, 29.66ex/s]
+preprocess dataset #22:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎   | 2510/2554 [01:19<00:01, 27.51ex/s]
+preprocess dataset #23:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 2525/2554 [01:19<00:00, 29.89ex/s]
+preprocess dataset #24:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 2523/2554 [01:19<00:00, 36.67ex/s]
+preprocess dataset #25:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 2519/2554 [01:19<00:01, 33.67ex/s]
+preprocess dataset #26: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 2546/2554 [01:18<00:00, 33.44ex/s]
+preprocess dataset #27:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 2519/2554 [01:18<00:01, 33.17ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2547/2554 [01:18<00:00, 26.97ex/s]
+preprocess dataset #29:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 2520/2554 [01:18<00:01, 25.50ex/s]
+preprocess dataset #30:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉   | 2518/2554 [01:18<00:01, 34.38ex/s]
+preprocess dataset #14: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2554/2554 [01:20<00:00, 34.07ex/s]
+preprocess dataset #23: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2547/2554 [01:19<00:00, 28.56ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 2549/2554 [01:19<00:00, 32.18ex/s]
+preprocess dataset #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 2553/2554 [01:20<00:00, 33.04ex/s]
+preprocess dataset #27:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 2537/2554 [01:19<00:00, 23.90ex/s]
+preprocess dataset #16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2554/2554 [01:20<00:00, 36.03ex/s]
+preprocess dataset #27:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 2541/2554 [01:19<00:00, 26.21ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 2545/2554 [01:19<00:00, 39.22ex/s]
+preprocess dataset #25: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2550/2554 [01:19<00:00, 36.97ex/s]
+preprocess dataset #31:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 2540/2554 [01:19<00:00, 34.86ex/s]
+preprocess dataset #27: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 2546/2554 [01:19<00:00, 30.78ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 2549/2554 [01:19<00:00, 37.82ex/s]
+preprocess dataset #25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2554/2554 [01:20<00:00, 36.89ex/s]
+preprocess dataset #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 2543/2554 [01:20<00:00, 28.89ex/s]
+preprocess dataset #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 2549/2554 [01:20<00:00, 24.67ex/s]
+preprocess dataset #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 2552/2554 [01:20<00:00, 23.87ex/s]
+preprocess dataset #27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2554/2554 [01:20<00:00, 27.51ex/s]
+#17: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 459.08ba/s]
+#19: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 517.92ba/s]
+#20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 522.31ba/s]
+#18: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 394.48ba/s]
+#23: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 516.35ba/s]
+#21: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 368.12ba/s]
+#22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 397.48ba/s]
+#24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 460.33ba/s]
+#25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 505.26ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 503.48ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 484.43ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 455.70ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 425.02ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 497.20ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 484.61ba/s]
+#17:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 277.25ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 284.70ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 267.02ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 193.90ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 265.73ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 230.44ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 324.10ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 341.21ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 339.18ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 289.16ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 101.23ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 126.03ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 128.33ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 130.89ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 131.91ba/s]
+https://symbolize.stripped_domain/r/?trace=7f219dc5d3f4,7f219dcb10bf,7f,3c80c675bd6313eb&map= ████████████████████████████████████████████████████████████▌                                                                                                | 6/10 [00:00<00:00, 59.07ba/s]
+*** SIGTERM received by PID 2217037 (TID 2217037) on cpu 4 from PID 3212038; stack trace: ***
+PC: @     0x7f219dc5d3f4  (unknown)  do_futex_wait.constprop.0                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @     0x7f1f4e49d294        976  (unknown)████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                        | 9/10 [00:00<00:00, 85.63ba/s]
+    @     0x7f219dcb10c0  989785232  (unknown)
+    @               0x80  (unknown)  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @ 0x3c80c675bd6313ec  (unknown)  (unknown)████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                        | 9/10 [00:00<00:00, 88.76ba/s]
+https://symbolize.stripped_domain/r/?trace=7f219dc5d3f4,7f1f4e49d293,7f219dcb10bf,7f,3c80c675bd6313eb&map=fbcd4e3f2be272741f2aecd9d840a066:7f1f38f00000-7f1f4e82fc60
+E0730 12:56:17.641515 2217037 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                              | 0/10 [00:00<?, ?ba/s]
+E0730 12:56:17.716694 2217037 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#9: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 108.02ba/s]
+#10: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.34ba/s]
+#12: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 97.42ba/s]
+#11: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 74.50ba/s]
+#14: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 107.13ba/s]
+#13: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 93.95ba/s]
+#15: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.44ba/s]
+#16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.26ba/s]
+#17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 110.01ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 113.22ba/s]
+#18: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 80.62ba/s]
+#19: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 90.30ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.84ba/s]
+#22: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 93.31ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 105.65ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 108.86ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 108.86ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 112.62ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.64ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 112.34ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 114.07ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 116.58ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 116.58ba/s]
+https://symbolize.stripped_domain/r/?trace=7f219dc5d3f4,7f219dcb10bf,7f,3c80c675bd6313eb&map=                                                                                                                                                                       | 0/2 [00:00<?, ?ba/s]
+*** SIGTERM received by PID 2217207 (TID 2217207) on cpu 2 from PID 3212038; stack trace: ***
+PC: @     0x7f219dc5d3f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7f1f4e49d294        976  (unknown)                                                                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+    @     0x7f219dcb10c0  989785232  (unknown)
+    @               0x80  (unknown)  (unknown)                                                                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+    @ 0x3c80c675bd6313ec  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7f219dc5d3f4,7f1f4e49d293,7f219dcb10bf,7f,3c80c675bd6313eb&map=fbcd4e3f2be272741f2aecd9d840a066:7f1f38f00000-7f1f4e82fc60                                                                                                | 0/2 [00:00<?, ?ba/s]
+E0730 12:56:22.847108 2217207 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0730 12:56:22.896209 2217207 process_state.cc:774] RAW: Raising signal 15 with default behavior                                                                                                                                                                    | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 116.49ba/s]
+#11:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#12:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#13:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#14:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#15:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#16:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+  "classifier_proj_size": 256,                                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.04,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": true,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.047,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.041,
+  "mask_feature_length": 64,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.25,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.082,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 38,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 39,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file ./preprocessor_config.json
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+2022-07-30 12:56:30.042939: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
+2022-07-30 12:56:30.042991: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
+/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+run_flax_speech_recognition_ctc.py:337: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+INFO:__main__:***** Running training *****
+INFO:__main__:  Num examples = 302693
+INFO:__main__:  Num Epochs = 40
+INFO:__main__:  Instantaneous batch size per device = 16
+INFO:__main__:  Num gradient accumulation steps = 1
+INFO:__main__:  Total train batch size (w. parallel & distributed) = 128
+INFO:__main__:  Total optimization steps = 94560
+INFO:__main__:  Gradient checkpointing: True
+INFO:__main__:  Use scan: False
+INFO:__main__:  Fuse matmuls: False
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [00:00<?, ?it/s]
+  grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)                                                                                                                                                                                     | 0/2364 [00:00<?, ?it/s]
+run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+run_flax_speech_recognition_ctc.py:1392: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+run_flax_speech_recognition_ctc.py:1399: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:   4%|█████████▌                                                                                                                                                                                                                          | 99/2364 [42:57<1:04:42,  1.71s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+2022-07-30 13:43:36.194672: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2130] Execution of replica 5 failed: RESOURCE_EXHAUSTED: Attempting to reserve 5.81G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 5.56G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation.  The nearest obstacle is at 5.56G from the bottom with size 19.53M.
+2022-07-30 13:43:36.196058: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2130] Execution of replica 7 failed: RESOURCE_EXHAUSTED: Attempting to reserve 5.81G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 5.56G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation.  The nearest obstacle is at 5.56G from the bottom with size 19.53M.
+2022-07-30 13:43:36.196988: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2130] Execution of replica 3 failed: RESOURCE_EXHAUSTED: Attempting to reserve 5.81G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 5.56G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation.  The nearest obstacle is at 5.56G from the bottom with size 19.53M.
+2022-07-30 13:43:36.197155: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2130] Execution of replica 6 failed: RESOURCE_EXHAUSTED: Attempting to reserve 5.81G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 5.56G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation.  The nearest obstacle is at 5.56G from the bottom with size 19.53M.
+2022-07-30 13:43:36.197298: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2130] Execution of replica 2 failed: RESOURCE_EXHAUSTED: Attempting to reserve 5.81G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 5.56G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation.  The nearest obstacle is at 5.56G from the bottom with size 19.53M.
+2022-07-30 13:43:36.197371: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2130] Execution of replica 1 failed: RESOURCE_EXHAUSTED: Attempting to reserve 5.81G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 5.56G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation.  The nearest obstacle is at 5.56G from the bottom with size 19.53M.
+Training...:   6%|██████████████▎                                                                                                                                                                                                                   | 150/2364 [46:51<11:31:34, 18.74s/it]
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [46:58<?, ?it/s]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1517, in main
+    state, train_metric = p_train_step(state, batch)
+ValueError: RESOURCE_EXHAUSTED: Attempting to reserve 5.81G at the bottom of memory. That was not possible. There are 8.21G free, 0B reserved, and 5.56G reservable. If fragmentation is eliminated, the maximum reservable bytes would be 8.21G, so compaction will enable this reservation.  The nearest obstacle is at 5.56G from the bottom with size 19.53M.: while running replica 1 and partition 0 of a replicated computation (other replicas may have failed as well).
\ No newline at end of file
diff --git a/wandb/run-20220730_124505-101ubxa3/files/requirements.txt b/wandb/run-20220730_124505-101ubxa3/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab
--- /dev/null
+++ b/wandb/run-20220730_124505-101ubxa3/files/requirements.txt
@@ -0,0 +1,158 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+exceptiongroup==1.0.0rc8
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+hypothesis==6.53.0
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+jiwer==2.3.0
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pyctcdecode==0.4.0
+pygments==2.11.1
+pygtrie==2.5.0
+pyparsing==3.0.6
+python-dateutil==2.8.2
+python-levenshtein==0.12.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+sortedcontainers==2.4.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220730_124505-101ubxa3/files/wandb-metadata.json b/wandb/run-20220730_124505-101ubxa3/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..8d31bf5a3841b1f7cf1d6e8c36cbfc3db00c9dce
--- /dev/null
+++ b/wandb/run-20220730_124505-101ubxa3/files/wandb-metadata.json
@@ -0,0 +1,67 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-30T12:45:09.201824",
+    "startedAt": "2022-07-30T12:45:05.740121",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=./",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=16",
+        "--per_device_eval_batch_size=16",
+        "--gradient_accumulation_steps=1",
+        "--precision=full_mixed",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json b/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..39cfa6cf1cd3578c40691bd1018fb22dbebc7bef
--- /dev/null
+++ b/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json
@@ -0,0 +1 @@
+{"train/grad_norm": 8.5, "layer_grad_norm/": {"lm_head": {"bias": 0.2890625, "kernel": 5.5}, "wav2vec2": {"encoder": {"layer_norm": {"bias": 0.21875, "scale": 0.20703125}, "layers": {"0": {"attention": {"k_proj": {"bias": 6.4849853515625e-05, "kernel": 0.0556640625}, "out_proj": {"bias": 0.017578125, "kernel": 0.1708984375}, "q_proj": {"bias": 0.0052490234375, "kernel": 0.07373046875}, "v_proj": {"bias": 0.01458740234375, "kernel": 0.12890625}}, "feed_forward": {"intermediate_dense": {"bias": 0.019775390625, "kernel": 0.263671875}, "output_dense": {"bias": 0.01055908203125, "kernel": 0.224609375}}, "final_layer_norm": {"bias": 0.0478515625, "scale": 0.0625}, "layer_norm": {"bias": 0.03271484375, "scale": 0.083984375}}, "1": {"attention": {"k_proj": {"bias": 3.62396240234375e-05, "kernel": 0.018310546875}, "out_proj": {"bias": 0.0123291015625, "kernel": 0.130859375}, "q_proj": {"bias": 0.001708984375, "kernel": 0.021240234375}, "v_proj": {"bias": 0.0169677734375, "kernel": 0.11083984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0130615234375, "kernel": 0.189453125}, "output_dense": {"bias": 0.0111083984375, "kernel": 0.1689453125}}, "final_layer_norm": {"bias": 0.0242919921875, "scale": 0.03125}, "layer_norm": {"bias": 0.026123046875, "scale": 0.0283203125}}, "10": {"attention": {"k_proj": {"bias": 1.4543533325195312e-05, "kernel": 0.033203125}, "out_proj": {"bias": 0.0115966796875, "kernel": 0.1015625}, "q_proj": {"bias": 0.0024566650390625, "kernel": 0.03759765625}, "v_proj": {"bias": 0.017333984375, "kernel": 0.134765625}}, "feed_forward": {"intermediate_dense": {"bias": 0.012451171875, "kernel": 0.162109375}, "output_dense": {"bias": 0.0101318359375, "kernel": 0.12890625}}, "final_layer_norm": {"bias": 0.0224609375, "scale": 0.0230712890625}, "layer_norm": {"bias": 0.03125, "scale": 0.0203857421875}}, "11": {"attention": {"k_proj": {"bias": 1.33514404296875e-05, "kernel": 0.02783203125}, "out_proj": {"bias": 0.01080322265625, "kernel": 0.1064453125}, "q_proj": {"bias": 0.001983642578125, "kernel": 0.029541015625}, "v_proj": {"bias": 0.0152587890625, "kernel": 0.1328125}}, "feed_forward": {"intermediate_dense": {"bias": 0.01031494140625, "kernel": 0.1416015625}, "output_dense": {"bias": 0.0093994140625, "kernel": 0.11572265625}}, "final_layer_norm": {"bias": 0.02099609375, "scale": 0.017333984375}, "layer_norm": {"bias": 0.02490234375, "scale": 0.0133056640625}}, "12": {"attention": {"k_proj": {"bias": 1.2159347534179688e-05, "kernel": 0.0286865234375}, "out_proj": {"bias": 0.01019287109375, "kernel": 0.0908203125}, "q_proj": {"bias": 0.001678466796875, "kernel": 0.0289306640625}, "v_proj": {"bias": 0.01239013671875, "kernel": 0.1064453125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00860595703125, "kernel": 0.109375}, "output_dense": {"bias": 0.00982666015625, "kernel": 0.10888671875}}, "final_layer_norm": {"bias": 0.01507568359375, "scale": 0.0189208984375}, "layer_norm": {"bias": 0.0185546875, "scale": 0.013916015625}}, "13": {"attention": {"k_proj": {"bias": 1.2636184692382812e-05, "kernel": 0.03173828125}, "out_proj": {"bias": 0.0108642578125, "kernel": 0.10498046875}, "q_proj": {"bias": 0.0025634765625, "kernel": 0.03515625}, "v_proj": {"bias": 0.01397705078125, "kernel": 0.12158203125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00927734375, "kernel": 0.11279296875}, "output_dense": {"bias": 0.010009765625, "kernel": 0.11376953125}}, "final_layer_norm": {"bias": 0.017822265625, "scale": 0.02294921875}, "layer_norm": {"bias": 0.021240234375, "scale": 0.031494140625}}, "14": {"attention": {"k_proj": {"bias": 1.3113021850585938e-05, "kernel": 0.0255126953125}, "out_proj": {"bias": 0.010009765625, "kernel": 0.0966796875}, "q_proj": {"bias": 0.0020751953125, "kernel": 0.02587890625}, "v_proj": {"bias": 0.0107421875, "kernel": 0.09716796875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0081787109375, "kernel": 0.1064453125}, "output_dense": {"bias": 0.0091552734375, "kernel": 0.1123046875}}, "final_layer_norm": {"bias": 0.0191650390625, "scale": 0.01336669921875}, "layer_norm": {"bias": 0.0162353515625, "scale": 0.024169921875}}, "15": {"attention": {"k_proj": {"bias": 8.404254913330078e-06, "kernel": 0.0185546875}, "out_proj": {"bias": 0.00958251953125, "kernel": 0.09375}, "q_proj": {"bias": 0.0011749267578125, "kernel": 0.01708984375}, "v_proj": {"bias": 0.00982666015625, "kernel": 0.0849609375}}, "feed_forward": {"intermediate_dense": {"bias": 0.006317138671875, "kernel": 0.07470703125}, "output_dense": {"bias": 0.0096435546875, "kernel": 0.10205078125}}, "final_layer_norm": {"bias": 0.010986328125, "scale": 0.0125732421875}, "layer_norm": {"bias": 0.012939453125, "scale": 0.009033203125}}, "16": {"attention": {"k_proj": {"bias": 5.990266799926758e-06, "kernel": 0.017578125}, "out_proj": {"bias": 0.00982666015625, "kernel": 0.078125}, "q_proj": {"bias": 0.00110626220703125, "kernel": 0.016357421875}, "v_proj": {"bias": 0.00982666015625, "kernel": 0.0791015625}}, "feed_forward": {"intermediate_dense": {"bias": 0.00543212890625, "kernel": 0.064453125}, "output_dense": {"bias": 0.009765625, "kernel": 0.0986328125}}, "final_layer_norm": {"bias": 0.00927734375, "scale": 0.013671875}, "layer_norm": {"bias": 0.01416015625, "scale": 0.0125732421875}}, "17": {"attention": {"k_proj": {"bias": 8.344650268554688e-06, "kernel": 0.019287109375}, "out_proj": {"bias": 0.01043701171875, "kernel": 0.0869140625}, "q_proj": {"bias": 0.001495361328125, "kernel": 0.0194091796875}, "v_proj": {"bias": 0.0108642578125, "kernel": 0.08544921875}}, "feed_forward": {"intermediate_dense": {"bias": 0.00531005859375, "kernel": 0.06005859375}, "output_dense": {"bias": 0.0106201171875, "kernel": 0.1025390625}}, "final_layer_norm": {"bias": 0.009033203125, "scale": 0.0107421875}, "layer_norm": {"bias": 0.015625, "scale": 0.013671875}}, "18": {"attention": {"k_proj": {"bias": 5.930662155151367e-06, "kernel": 0.015869140625}, "out_proj": {"bias": 0.0107421875, "kernel": 0.08154296875}, "q_proj": {"bias": 0.00109100341796875, "kernel": 0.01434326171875}, "v_proj": {"bias": 0.01080322265625, "kernel": 0.0830078125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00482177734375, "kernel": 0.05517578125}, "output_dense": {"bias": 0.0108642578125, "kernel": 0.1064453125}}, "final_layer_norm": {"bias": 0.0079345703125, "scale": 0.0074462890625}, "layer_norm": {"bias": 0.01513671875, "scale": 0.017578125}}, "19": {"attention": {"k_proj": {"bias": 4.470348358154297e-06, "kernel": 0.0128173828125}, "out_proj": {"bias": 0.0113525390625, "kernel": 0.07763671875}, "q_proj": {"bias": 0.00091552734375, "kernel": 0.01092529296875}, "v_proj": {"bias": 0.0115966796875, "kernel": 0.08203125}}, "feed_forward": {"intermediate_dense": {"bias": 0.004638671875, "kernel": 0.05224609375}, "output_dense": {"bias": 0.011474609375, "kernel": 0.111328125}}, "final_layer_norm": {"bias": 0.0074462890625, "scale": 0.0106201171875}, "layer_norm": {"bias": 0.015625, "scale": 0.0133056640625}}, "2": {"attention": {"k_proj": {"bias": 3.528594970703125e-05, "kernel": 0.024169921875}, "out_proj": {"bias": 0.013427734375, "kernel": 0.1396484375}, "q_proj": {"bias": 0.001983642578125, "kernel": 0.0283203125}, "v_proj": {"bias": 0.0186767578125, "kernel": 0.1484375}}, "feed_forward": {"intermediate_dense": {"bias": 0.01422119140625, "kernel": 0.23046875}, "output_dense": {"bias": 0.0123291015625, "kernel": 0.17578125}}, "final_layer_norm": {"bias": 0.025146484375, "scale": 0.029541015625}, "layer_norm": {"bias": 0.0262451171875, "scale": 0.01904296875}}, "20": {"attention": {"k_proj": {"bias": 3.1888484954833984e-06, "kernel": 0.008056640625}, "out_proj": {"bias": 0.01214599609375, "kernel": 0.07080078125}, "q_proj": {"bias": 0.00057220458984375, "kernel": 0.00665283203125}, "v_proj": {"bias": 0.01220703125, "kernel": 0.0810546875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0048828125, "kernel": 0.0517578125}, "output_dense": {"bias": 0.01220703125, "kernel": 0.119140625}}, "final_layer_norm": {"bias": 0.007720947265625, "scale": 0.008544921875}, "layer_norm": {"bias": 0.0169677734375, "scale": 0.010986328125}}, "21": {"attention": {"k_proj": {"bias": 3.6954879760742188e-06, "kernel": 0.00921630859375}, "out_proj": {"bias": 0.0128173828125, "kernel": 0.0859375}, "q_proj": {"bias": 0.00066375732421875, "kernel": 0.00799560546875}, "v_proj": {"bias": 0.01300048828125, "kernel": 0.091796875}}, "feed_forward": {"intermediate_dense": {"bias": 0.00518798828125, "kernel": 0.056396484375}, "output_dense": {"bias": 0.01300048828125, "kernel": 0.12890625}}, "final_layer_norm": {"bias": 0.008544921875, "scale": 0.010009765625}, "layer_norm": {"bias": 0.0169677734375, "scale": 0.01287841796875}}, "22": {"attention": {"k_proj": {"bias": 5.841255187988281e-06, "kernel": 0.01361083984375}, "out_proj": {"bias": 0.013916015625, "kernel": 0.09375}, "q_proj": {"bias": 0.001373291015625, "kernel": 0.012939453125}, "v_proj": {"bias": 0.01409912109375, "kernel": 0.09765625}}, "feed_forward": {"intermediate_dense": {"bias": 0.006195068359375, "kernel": 0.064453125}, "output_dense": {"bias": 0.01416015625, "kernel": 0.140625}}, "final_layer_norm": {"bias": 0.0101318359375, "scale": 0.0244140625}, "layer_norm": {"bias": 0.019775390625, "scale": 0.02294921875}}, "23": {"attention": {"k_proj": {"bias": 7.987022399902344e-06, "kernel": 0.01544189453125}, "out_proj": {"bias": 0.01513671875, "kernel": 0.1240234375}, "q_proj": {"bias": 0.00118255615234375, "kernel": 0.01458740234375}, "v_proj": {"bias": 0.0159912109375, "kernel": 0.1259765625}}, "feed_forward": {"intermediate_dense": {"bias": 0.006103515625, "kernel": 0.0615234375}, "output_dense": {"bias": 0.01544189453125, "kernel": 0.1435546875}}, "final_layer_norm": {"bias": 0.009521484375, "scale": 0.0093994140625}, "layer_norm": {"bias": 0.021728515625, "scale": 0.0106201171875}}, "24": {"attention": {"k_proj": {"bias": 4.708766937255859e-06, "kernel": 0.01177978515625}, "out_proj": {"bias": 0.0155029296875, "kernel": 0.1181640625}, "q_proj": {"bias": 0.0009918212890625, "kernel": 0.01123046875}, "v_proj": {"bias": 0.016357421875, "kernel": 0.1220703125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00634765625, "kernel": 0.06640625}, "output_dense": {"bias": 0.015869140625, "kernel": 0.146484375}}, "final_layer_norm": {"bias": 0.01025390625, "scale": 0.0101318359375}, "layer_norm": {"bias": 0.022705078125, "scale": 0.015625}}, "25": {"attention": {"k_proj": {"bias": 7.212162017822266e-06, "kernel": 0.017333984375}, "out_proj": {"bias": 0.01708984375, "kernel": 0.150390625}, "q_proj": {"bias": 0.001617431640625, "kernel": 0.017822265625}, "v_proj": {"bias": 0.0184326171875, "kernel": 0.1474609375}}, "feed_forward": {"intermediate_dense": {"bias": 0.00830078125, "kernel": 0.0859375}, "output_dense": {"bias": 0.0177001953125, "kernel": 0.1640625}}, "final_layer_norm": {"bias": 0.015625, "scale": 0.041015625}, "layer_norm": {"bias": 0.026123046875, "scale": 0.0205078125}}, "26": {"attention": {"k_proj": {"bias": 4.9173831939697266e-06, "kernel": 0.016845703125}, "out_proj": {"bias": 0.018798828125, "kernel": 0.150390625}, "q_proj": {"bias": 0.00164794921875, "kernel": 0.018310546875}, "v_proj": {"bias": 0.0206298828125, "kernel": 0.1669921875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0103759765625, "kernel": 0.1044921875}, "output_dense": {"bias": 0.019775390625, "kernel": 0.1787109375}}, "final_layer_norm": {"bias": 0.017578125, "scale": 0.0244140625}, "layer_norm": {"bias": 0.02880859375, "scale": 0.0145263671875}}, "27": {"attention": {"k_proj": {"bias": 1.0848045349121094e-05, "kernel": 0.0185546875}, "out_proj": {"bias": 0.0196533203125, "kernel": 0.17578125}, "q_proj": {"bias": 0.0018463134765625, "kernel": 0.0211181640625}, "v_proj": {"bias": 0.02099609375, "kernel": 0.1767578125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0120849609375, "kernel": 0.1123046875}, "output_dense": {"bias": 0.02099609375, "kernel": 0.189453125}}, "final_layer_norm": {"bias": 0.0208740234375, "scale": 0.028076171875}, "layer_norm": {"bias": 0.029541015625, "scale": 0.01495361328125}}, "28": {"attention": {"k_proj": {"bias": 6.794929504394531e-06, "kernel": 0.0172119140625}, "out_proj": {"bias": 0.020751953125, "kernel": 0.185546875}, "q_proj": {"bias": 0.001861572265625, "kernel": 0.02001953125}, "v_proj": {"bias": 0.0230712890625, "kernel": 0.19140625}}, "feed_forward": {"intermediate_dense": {"bias": 0.01190185546875, "kernel": 0.11328125}, "output_dense": {"bias": 0.022216796875, "kernel": 0.205078125}}, "final_layer_norm": {"bias": 0.0224609375, "scale": 0.024169921875}, "layer_norm": {"bias": 0.0341796875, "scale": 0.024169921875}}, "29": {"attention": {"k_proj": {"bias": 5.7220458984375e-06, "kernel": 0.0189208984375}, "out_proj": {"bias": 0.0218505859375, "kernel": 0.1884765625}, "q_proj": {"bias": 0.00191497802734375, "kernel": 0.0208740234375}, "v_proj": {"bias": 0.025146484375, "kernel": 0.2109375}}, "feed_forward": {"intermediate_dense": {"bias": 0.01141357421875, "kernel": 0.111328125}, "output_dense": {"bias": 0.0228271484375, "kernel": 0.212890625}}, "final_layer_norm": {"bias": 0.01806640625, "scale": 0.024658203125}, "layer_norm": {"bias": 0.037841796875, "scale": 0.019287109375}}, "3": {"attention": {"k_proj": {"bias": 3.647804260253906e-05, "kernel": 0.03759765625}, "out_proj": {"bias": 0.01361083984375, "kernel": 0.15625}, "q_proj": {"bias": 0.002716064453125, "kernel": 0.039794921875}, "v_proj": {"bias": 0.019287109375, "kernel": 0.1748046875}}, "feed_forward": {"intermediate_dense": {"bias": 0.01446533203125, "kernel": 0.232421875}, "output_dense": {"bias": 0.012451171875, "kernel": 0.173828125}}, "final_layer_norm": {"bias": 0.027099609375, "scale": 0.0322265625}, "layer_norm": {"bias": 0.0286865234375, "scale": 0.020263671875}}, "30": {"attention": {"k_proj": {"bias": 8.463859558105469e-06, "kernel": 0.0213623046875}, "out_proj": {"bias": 0.0238037109375, "kernel": 0.212890625}, "q_proj": {"bias": 0.002105712890625, "kernel": 0.0234375}, "v_proj": {"bias": 0.027099609375, "kernel": 0.244140625}}, "feed_forward": {"intermediate_dense": {"bias": 0.01226806640625, "kernel": 0.12255859375}, "output_dense": {"bias": 0.0250244140625, "kernel": 0.2294921875}}, "final_layer_norm": {"bias": 0.020751953125, "scale": 0.031005859375}, "layer_norm": {"bias": 0.037353515625, "scale": 0.020263671875}}, "31": {"attention": {"k_proj": {"bias": 1.4901161193847656e-05, "kernel": 0.04736328125}, "out_proj": {"bias": 0.0269775390625, "kernel": 0.29296875}, "q_proj": {"bias": 0.00433349609375, "kernel": 0.05224609375}, "v_proj": {"bias": 0.03271484375, "kernel": 0.314453125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0137939453125, "kernel": 0.140625}, "output_dense": {"bias": 0.0284423828125, "kernel": 0.2578125}}, "final_layer_norm": {"bias": 0.02294921875, "scale": 0.02880859375}, "layer_norm": {"bias": 0.048583984375, "scale": 0.025634765625}}, "32": {"attention": {"k_proj": {"bias": 1.2040138244628906e-05, "kernel": 0.0299072265625}, "out_proj": {"bias": 0.0283203125, "kernel": 0.30078125}, "q_proj": {"bias": 0.00274658203125, "kernel": 0.032470703125}, "v_proj": {"bias": 0.03271484375, "kernel": 0.314453125}}, "feed_forward": {"intermediate_dense": {"bias": 0.01611328125, "kernel": 0.171875}, "output_dense": {"bias": 0.030517578125, "kernel": 0.2734375}}, "final_layer_norm": {"bias": 0.0263671875, "scale": 0.030029296875}, "layer_norm": {"bias": 0.044921875, "scale": 0.0439453125}}, "33": {"attention": {"k_proj": {"bias": 1.0013580322265625e-05, "kernel": 0.03466796875}, "out_proj": {"bias": 0.031494140625, "kernel": 0.361328125}, "q_proj": {"bias": 0.002899169921875, "kernel": 0.0380859375}, "v_proj": {"bias": 0.037109375, "kernel": 0.384765625}}, "feed_forward": {"intermediate_dense": {"bias": 0.021240234375, "kernel": 0.23046875}, "output_dense": {"bias": 0.03466796875, "kernel": 0.30859375}}, "final_layer_norm": {"bias": 0.035400390625, "scale": 0.058349609375}, "layer_norm": {"bias": 0.0498046875, "scale": 0.037109375}}, "34": {"attention": {"k_proj": {"bias": 1.1563301086425781e-05, "kernel": 0.043701171875}, "out_proj": {"bias": 0.0322265625, "kernel": 0.404296875}, "q_proj": {"bias": 0.00384521484375, "kernel": 0.048583984375}, "v_proj": {"bias": 0.039794921875, "kernel": 0.443359375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0263671875, "kernel": 0.322265625}, "output_dense": {"bias": 0.035888671875, "kernel": 0.33984375}}, "final_layer_norm": {"bias": 0.044189453125, "scale": 0.07373046875}, "layer_norm": {"bias": 0.05908203125, "scale": 0.04638671875}}, "35": {"attention": {"k_proj": {"bias": 4.38690185546875e-05, "kernel": 0.0615234375}, "out_proj": {"bias": 0.037353515625, "kernel": 0.640625}, "q_proj": {"bias": 0.00482177734375, "kernel": 0.0703125}, "v_proj": {"bias": 0.044921875, "kernel": 0.6015625}}, "feed_forward": {"intermediate_dense": {"bias": 0.031982421875, "kernel": 0.423828125}, "output_dense": {"bias": 0.0400390625, "kernel": 0.38671875}}, "final_layer_norm": {"bias": 0.052001953125, "scale": 0.052001953125}, "layer_norm": {"bias": 0.0693359375, "scale": 0.041259765625}}, "36": {"attention": {"k_proj": {"bias": 2.6941299438476562e-05, "kernel": 0.06640625}, "out_proj": {"bias": 0.04052734375, "kernel": 0.7421875}, "q_proj": {"bias": 0.005157470703125, "kernel": 0.08154296875}, "v_proj": {"bias": 0.0478515625, "kernel": 0.640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.039306640625, "kernel": 0.5390625}, "output_dense": {"bias": 0.04345703125, "kernel": 0.443359375}}, "final_layer_norm": {"bias": 0.0634765625, "scale": 0.04931640625}, "layer_norm": {"bias": 0.0732421875, "scale": 0.05419921875}}, "37": {"attention": {"k_proj": {"bias": 2.6702880859375e-05, "kernel": 0.0859375}, "out_proj": {"bias": 0.04296875, "kernel": 0.8515625}, "q_proj": {"bias": 0.006744384765625, "kernel": 0.099609375}, "v_proj": {"bias": 0.06103515625, "kernel": 0.83984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.05224609375, "kernel": 0.75}, "output_dense": {"bias": 0.045166015625, "kernel": 0.50390625}}, "final_layer_norm": {"bias": 0.087890625, "scale": 0.0625}, "layer_norm": {"bias": 0.09912109375, "scale": 0.091796875}}, "38": {"attention": {"k_proj": {"bias": 2.8967857360839844e-05, "kernel": 0.10546875}, "out_proj": {"bias": 0.0419921875, "kernel": 0.90625}, "q_proj": {"bias": 0.0076904296875, "kernel": 0.119140625}, "v_proj": {"bias": 0.06005859375, "kernel": 0.859375}}, "feed_forward": {"intermediate_dense": {"bias": 0.05859375, "kernel": 0.9296875}, "output_dense": {"bias": 0.04248046875, "kernel": 0.59375}}, "final_layer_norm": {"bias": 0.10205078125, "scale": 0.08056640625}, "layer_norm": {"bias": 0.1015625, "scale": 0.064453125}}, "39": {"attention": {"k_proj": {"bias": 2.9325485229492188e-05, "kernel": 0.140625}, "out_proj": {"bias": 0.0380859375, "kernel": 0.953125}, "q_proj": {"bias": 0.007568359375, "kernel": 0.138671875}, "v_proj": {"bias": 0.05224609375, "kernel": 0.85546875}}, "feed_forward": {"intermediate_dense": {"bias": 0.05029296875, "kernel": 0.890625}, "output_dense": {"bias": 0.03759765625, "kernel": 0.828125}}, "final_layer_norm": {"bias": 0.0791015625, "scale": 0.0732421875}, "layer_norm": {"bias": 0.09375, "scale": 0.10546875}}, "4": {"attention": {"k_proj": {"bias": 6.103515625e-05, "kernel": 0.0517578125}, "out_proj": {"bias": 0.0128173828125, "kernel": 0.1689453125}, "q_proj": {"bias": 0.003570556640625, "kernel": 0.05419921875}, "v_proj": {"bias": 0.01806640625, "kernel": 0.1923828125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0140380859375, "kernel": 0.2177734375}, "output_dense": {"bias": 0.01214599609375, "kernel": 0.16796875}}, "final_layer_norm": {"bias": 0.024169921875, "scale": 0.023193359375}, "layer_norm": {"bias": 0.0269775390625, "scale": 0.0198974609375}}, "40": {"attention": {"k_proj": {"bias": 2.372264862060547e-05, "kernel": 0.10791015625}, "out_proj": {"bias": 0.035400390625, "kernel": 1.1015625}, "q_proj": {"bias": 0.005859375, "kernel": 0.1162109375}, "v_proj": {"bias": 0.048828125, "kernel": 0.9375}}, "feed_forward": {"intermediate_dense": {"bias": 0.050537109375, "kernel": 0.9921875}, "output_dense": {"bias": 0.0361328125, "kernel": 0.6953125}}, "final_layer_norm": {"bias": 0.09033203125, "scale": 0.1103515625}, "layer_norm": {"bias": 0.08203125, "scale": 0.0693359375}}, "41": {"attention": {"k_proj": {"bias": 2.384185791015625e-05, "kernel": 0.11328125}, "out_proj": {"bias": 0.0306396484375, "kernel": 0.9375}, "q_proj": {"bias": 0.0068359375, "kernel": 0.1572265625}, "v_proj": {"bias": 0.046875, "kernel": 0.9609375}}, "feed_forward": {"intermediate_dense": {"bias": 0.044921875, "kernel": 0.99609375}, "output_dense": {"bias": 0.032470703125, "kernel": 0.81640625}}, "final_layer_norm": {"bias": 0.0830078125, "scale": 0.07421875}, "layer_norm": {"bias": 0.07470703125, "scale": 0.0927734375}}, "42": {"attention": {"k_proj": {"bias": 2.2411346435546875e-05, "kernel": 0.09521484375}, "out_proj": {"bias": 0.0308837890625, "kernel": 0.875}, "q_proj": {"bias": 0.0064697265625, "kernel": 0.14453125}, "v_proj": {"bias": 0.041015625, "kernel": 0.8515625}}, "feed_forward": {"intermediate_dense": {"bias": 0.042724609375, "kernel": 1.0078125}, "output_dense": {"bias": 0.032470703125, "kernel": 0.80078125}}, "final_layer_norm": {"bias": 0.076171875, "scale": 0.08544921875}, "layer_norm": {"bias": 0.06787109375, "scale": 0.09228515625}}, "43": {"attention": {"k_proj": {"bias": 2.1457672119140625e-05, "kernel": 0.060546875}, "out_proj": {"bias": 0.031494140625, "kernel": 0.765625}, "q_proj": {"bias": 0.004058837890625, "kernel": 0.08642578125}, "v_proj": {"bias": 0.04248046875, "kernel": 0.8046875}}, "feed_forward": {"intermediate_dense": {"bias": 0.04736328125, "kernel": 1.21875}, "output_dense": {"bias": 0.03515625, "kernel": 0.79296875}}, "final_layer_norm": {"bias": 0.0751953125, "scale": 0.08349609375}, "layer_norm": {"bias": 0.0830078125, "scale": 0.103515625}}, "44": {"attention": {"k_proj": {"bias": 1.0073184967041016e-05, "kernel": 0.083984375}, "out_proj": {"bias": 0.0341796875, "kernel": 0.7734375}, "q_proj": {"bias": 0.00628662109375, "kernel": 0.12890625}, "v_proj": {"bias": 0.0419921875, "kernel": 0.81640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.04248046875, "kernel": 1.203125}, "output_dense": {"bias": 0.03759765625, "kernel": 0.86328125}}, "final_layer_norm": {"bias": 0.068359375, "scale": 0.06005859375}, "layer_norm": {"bias": 0.078125, "scale": 0.08203125}}, "45": {"attention": {"k_proj": {"bias": 1.1682510375976562e-05, "kernel": 0.150390625}, "out_proj": {"bias": 0.035400390625, "kernel": 0.84375}, "q_proj": {"bias": 0.01275634765625, "kernel": 0.2578125}, "v_proj": {"bias": 0.04296875, "kernel": 0.8125}}, "feed_forward": {"intermediate_dense": {"bias": 0.038330078125, "kernel": 1.109375}, "output_dense": {"bias": 0.03662109375, "kernel": 0.9296875}}, "final_layer_norm": {"bias": 0.0576171875, "scale": 0.06787109375}, "layer_norm": {"bias": 0.09765625, "scale": 0.0849609375}}, "46": {"attention": {"k_proj": {"bias": 1.3947486877441406e-05, "kernel": 0.3203125}, "out_proj": {"bias": 0.032958984375, "kernel": 0.80078125}, "q_proj": {"bias": 0.01422119140625, "kernel": 0.283203125}, "v_proj": {"bias": 0.04736328125, "kernel": 0.8671875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0283203125, "kernel": 0.71484375}, "output_dense": {"bias": 0.0322265625, "kernel": 0.64453125}}, "final_layer_norm": {"bias": 0.044677734375, "scale": 0.05029296875}, "layer_norm": {"bias": 0.1279296875, "scale": 0.1484375}}, "47": {"attention": {"k_proj": {"bias": 8.225440979003906e-06, "kernel": 0.107421875}, "out_proj": {"bias": 0.03271484375, "kernel": 0.53515625}, "q_proj": {"bias": 0.007171630859375, "kernel": 0.1298828125}, "v_proj": {"bias": 0.052734375, "kernel": 0.90625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0247802734375, "kernel": 0.5}, "output_dense": {"bias": 0.03076171875, "kernel": 0.4921875}}, "final_layer_norm": {"bias": 0.041015625, "scale": 0.04052734375}, "layer_norm": {"bias": 0.1416015625, "scale": 0.125}}, "5": {"attention": {"k_proj": {"bias": 2.574920654296875e-05, "kernel": 0.035400390625}, "out_proj": {"bias": 0.0133056640625, "kernel": 0.12060546875}, "q_proj": {"bias": 0.0024871826171875, "kernel": 0.03955078125}, "v_proj": {"bias": 0.019287109375, "kernel": 0.1552734375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0137939453125, "kernel": 0.203125}, "output_dense": {"bias": 0.0128173828125, "kernel": 0.16015625}}, "final_layer_norm": {"bias": 0.023681640625, "scale": 0.021484375}, "layer_norm": {"bias": 0.0286865234375, "scale": 0.0244140625}}, "6": {"attention": {"k_proj": {"bias": 3.123283386230469e-05, "kernel": 0.0390625}, "out_proj": {"bias": 0.01300048828125, "kernel": 0.1455078125}, "q_proj": {"bias": 0.002593994140625, "kernel": 0.042236328125}, "v_proj": {"bias": 0.0186767578125, "kernel": 0.1708984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.01324462890625, "kernel": 0.1962890625}, "output_dense": {"bias": 0.01324462890625, "kernel": 0.154296875}}, "final_layer_norm": {"bias": 0.02294921875, "scale": 0.0220947265625}, "layer_norm": {"bias": 0.0272216796875, "scale": 0.01806640625}}, "7": {"attention": {"k_proj": {"bias": 8.869171142578125e-05, "kernel": 0.05859375}, "out_proj": {"bias": 0.01361083984375, "kernel": 0.162109375}, "q_proj": {"bias": 0.003631591796875, "kernel": 0.05712890625}, "v_proj": {"bias": 0.021484375, "kernel": 0.2119140625}}, "feed_forward": {"intermediate_dense": {"bias": 0.01348876953125, "kernel": 0.203125}, "output_dense": {"bias": 0.01275634765625, "kernel": 0.154296875}}, "final_layer_norm": {"bias": 0.026123046875, "scale": 0.0201416015625}, "layer_norm": {"bias": 0.035888671875, "scale": 0.02978515625}}, "8": {"attention": {"k_proj": {"bias": 5.793571472167969e-05, "kernel": 0.06298828125}, "out_proj": {"bias": 0.013671875, "kernel": 0.154296875}, "q_proj": {"bias": 0.00408935546875, "kernel": 0.06201171875}, "v_proj": {"bias": 0.0198974609375, "kernel": 0.19140625}}, "feed_forward": {"intermediate_dense": {"bias": 0.01556396484375, "kernel": 0.224609375}, "output_dense": {"bias": 0.01544189453125, "kernel": 0.1748046875}}, "final_layer_norm": {"bias": 0.029541015625, "scale": 0.02197265625}, "layer_norm": {"bias": 0.033203125, "scale": 0.0252685546875}}, "9": {"attention": {"k_proj": {"bias": 4.1484832763671875e-05, "kernel": 0.10205078125}, "out_proj": {"bias": 0.0135498046875, "kernel": 0.189453125}, "q_proj": {"bias": 0.00921630859375, "kernel": 0.1337890625}, "v_proj": {"bias": 0.0224609375, "kernel": 0.2255859375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0150146484375, "kernel": 0.216796875}, "output_dense": {"bias": 0.01177978515625, "kernel": 0.1533203125}}, "final_layer_norm": {"bias": 0.0283203125, "scale": 0.021484375}, "layer_norm": {"bias": 0.042724609375, "scale": 0.09716796875}}}, "pos_conv_embed": {"conv": {"bias": 0.02294921875, "weight_g": 0.017578125, "weight_v": 0.16796875}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "1": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "2": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "3": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "4": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "5": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "6": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}}}, "feature_projection": {"layer_norm": {"bias": 0.052734375, "scale": 0.0634765625}, "projection": {"bias": 0.03271484375, "kernel": 0.53125}}, "masked_spec_embed": 0.0}}, "layer_param_norm/": {"lm_head": {"bias": 0.000513934064656496, "kernel": 4.458161354064941}, "wav2vec2": {"encoder": {"layer_norm": {"bias": 0.8087399005889893, "scale": 22.21056365966797}, "layers": {"0": {"attention": {"k_proj": {"bias": 0.004918951541185379, "kernel": 25.907794952392578}, "out_proj": {"bias": 1.5435106754302979, "kernel": 25.071956634521484}, "q_proj": {"bias": 1.300584077835083, "kernel": 26.182353973388672}, "v_proj": {"bias": 0.34525907039642334, "kernel": 25.80518341064453}}, "feed_forward": {"intermediate_dense": {"bias": 1.7575452327728271, "kernel": 95.10066986083984}, "output_dense": {"bias": 1.0201668739318848, "kernel": 90.88314819335938}}, "final_layer_norm": {"bias": 1.2844293117523193, "scale": 19.878904342651367}, "layer_norm": {"bias": 3.290097951889038, "scale": 16.03858184814453}}, "1": {"attention": {"k_proj": {"bias": 0.0072013214230537415, "kernel": 40.25491714477539}, "out_proj": {"bias": 1.29144287109375, "kernel": 41.6448860168457}, "q_proj": {"bias": 2.8571319580078125, "kernel": 40.09453201293945}, "v_proj": {"bias": 0.28102225065231323, "kernel": 40.13139343261719}}, "feed_forward": {"intermediate_dense": {"bias": 1.5771745443344116, "kernel": 93.18878173828125}, "output_dense": {"bias": 0.803621768951416, "kernel": 84.10867309570312}}, "final_layer_norm": {"bias": 1.1331145763397217, "scale": 18.403972625732422}, "layer_norm": {"bias": 1.7383671998977661, "scale": 19.385173797607422}}, "10": {"attention": {"k_proj": {"bias": 0.028601065278053284, "kernel": 47.28178405761719}, "out_proj": {"bias": 1.2180893421173096, "kernel": 50.115089416503906}, "q_proj": {"bias": 2.4192051887512207, "kernel": 47.243019104003906}, "v_proj": {"bias": 0.315180242061615, "kernel": 50.312808990478516}}, "feed_forward": {"intermediate_dense": {"bias": 1.6207945346832275, "kernel": 97.54641723632812}, "output_dense": {"bias": 0.5638551712036133, "kernel": 91.48897552490234}}, "final_layer_norm": {"bias": 2.1993794441223145, "scale": 20.35513687133789}, "layer_norm": {"bias": 1.6890054941177368, "scale": 22.307621002197266}}, "11": {"attention": {"k_proj": {"bias": 0.09233956038951874, "kernel": 47.06513214111328}, "out_proj": {"bias": 1.0699727535247803, "kernel": 49.30004119873047}, "q_proj": {"bias": 2.4686877727508545, "kernel": 46.789527893066406}, "v_proj": {"bias": 0.3560459613800049, "kernel": 49.84272003173828}}, "feed_forward": {"intermediate_dense": {"bias": 1.6689014434814453, "kernel": 98.29637908935547}, "output_dense": {"bias": 0.5470572710037231, "kernel": 93.20014953613281}}, "final_layer_norm": {"bias": 2.178788661956787, "scale": 20.362865447998047}, "layer_norm": {"bias": 1.673203945159912, "scale": 22.604415893554688}}, "12": {"attention": {"k_proj": {"bias": 0.03298710286617279, "kernel": 47.65814971923828}, "out_proj": {"bias": 1.0556013584136963, "kernel": 49.61164474487305}, "q_proj": {"bias": 2.355423927307129, "kernel": 47.41543960571289}, "v_proj": {"bias": 0.34211331605911255, "kernel": 50.02819061279297}}, "feed_forward": {"intermediate_dense": {"bias": 1.712772250175476, "kernel": 99.13731384277344}, "output_dense": {"bias": 0.5366638898849487, "kernel": 94.74702453613281}}, "final_layer_norm": {"bias": 2.137502908706665, "scale": 20.325902938842773}, "layer_norm": {"bias": 1.7379934787750244, "scale": 23.156574249267578}}, "13": {"attention": {"k_proj": {"bias": 0.06237822026014328, "kernel": 49.54319763183594}, "out_proj": {"bias": 1.0485777854919434, "kernel": 49.24932098388672}, "q_proj": {"bias": 2.3326220512390137, "kernel": 49.39902114868164}, "v_proj": {"bias": 0.36998122930526733, "kernel": 49.42940139770508}}, "feed_forward": {"intermediate_dense": {"bias": 1.7665362358093262, "kernel": 99.74223327636719}, "output_dense": {"bias": 0.5536751747131348, "kernel": 95.18029022216797}}, "final_layer_norm": {"bias": 2.0175793170928955, "scale": 20.470375061035156}, "layer_norm": {"bias": 1.8363455533981323, "scale": 23.375064849853516}}, "14": {"attention": {"k_proj": {"bias": 0.14924587309360504, "kernel": 49.75286865234375}, "out_proj": {"bias": 1.2119272947311401, "kernel": 47.69355392456055}, "q_proj": {"bias": 2.3951995372772217, "kernel": 49.8133544921875}, "v_proj": {"bias": 0.3722843825817108, "kernel": 47.274627685546875}}, "feed_forward": {"intermediate_dense": {"bias": 1.8020305633544922, "kernel": 100.36180114746094}, "output_dense": {"bias": 0.5680183172225952, "kernel": 96.54590606689453}}, "final_layer_norm": {"bias": 2.1541244983673096, "scale": 20.61456298828125}, "layer_norm": {"bias": 1.9671568870544434, "scale": 23.55438804626465}}, "15": {"attention": {"k_proj": {"bias": 0.07449370622634888, "kernel": 49.87868881225586}, "out_proj": {"bias": 1.2609732151031494, "kernel": 48.29032897949219}, "q_proj": {"bias": 2.545073986053467, "kernel": 49.94525909423828}, "v_proj": {"bias": 0.40386444330215454, "kernel": 47.93647766113281}}, "feed_forward": {"intermediate_dense": {"bias": 1.8114612102508545, "kernel": 100.1701431274414}, "output_dense": {"bias": 0.7159097194671631, "kernel": 97.22415161132812}}, "final_layer_norm": {"bias": 2.0759165287017822, "scale": 20.706335067749023}, "layer_norm": {"bias": 2.215679168701172, "scale": 23.692293167114258}}, "16": {"attention": {"k_proj": {"bias": 0.028690317645668983, "kernel": 49.789791107177734}, "out_proj": {"bias": 1.1964430809020996, "kernel": 47.762840270996094}, "q_proj": {"bias": 2.625481128692627, "kernel": 49.67322540283203}, "v_proj": {"bias": 0.3599017262458801, "kernel": 47.443382263183594}}, "feed_forward": {"intermediate_dense": {"bias": 1.8105230331420898, "kernel": 100.82234191894531}, "output_dense": {"bias": 0.7385136485099792, "kernel": 98.08937072753906}}, "final_layer_norm": {"bias": 2.156309127807617, "scale": 21.189599990844727}, "layer_norm": {"bias": 2.149183750152588, "scale": 22.610759735107422}}, "17": {"attention": {"k_proj": {"bias": 0.015740085393190384, "kernel": 50.001319885253906}, "out_proj": {"bias": 1.139474630355835, "kernel": 47.080406188964844}, "q_proj": {"bias": 2.6962077617645264, "kernel": 50.104488372802734}, "v_proj": {"bias": 0.3982570171356201, "kernel": 46.750732421875}}, "feed_forward": {"intermediate_dense": {"bias": 1.8210697174072266, "kernel": 101.90509796142578}, "output_dense": {"bias": 0.7553049921989441, "kernel": 98.48403930664062}}, "final_layer_norm": {"bias": 2.240560531616211, "scale": 21.749385833740234}, "layer_norm": {"bias": 2.0655288696289062, "scale": 22.176105499267578}}, "18": {"attention": {"k_proj": {"bias": 0.0625796914100647, "kernel": 50.267364501953125}, "out_proj": {"bias": 1.2416081428527832, "kernel": 48.100120544433594}, "q_proj": {"bias": 2.5947299003601074, "kernel": 50.652549743652344}, "v_proj": {"bias": 0.4260401725769043, "kernel": 47.62586212158203}}, "feed_forward": {"intermediate_dense": {"bias": 1.8635938167572021, "kernel": 102.20716857910156}, "output_dense": {"bias": 0.8702353835105896, "kernel": 100.12542724609375}}, "final_layer_norm": {"bias": 2.342595100402832, "scale": 21.712392807006836}, "layer_norm": {"bias": 2.2413687705993652, "scale": 23.877422332763672}}, "19": {"attention": {"k_proj": {"bias": 0.008574407547712326, "kernel": 49.54846954345703}, "out_proj": {"bias": 1.2168827056884766, "kernel": 47.98948287963867}, "q_proj": {"bias": 2.867854595184326, "kernel": 49.98030090332031}, "v_proj": {"bias": 0.38979336619377136, "kernel": 47.23333740234375}}, "feed_forward": {"intermediate_dense": {"bias": 1.9191184043884277, "kernel": 102.78443908691406}, "output_dense": {"bias": 0.9342584609985352, "kernel": 101.0276107788086}}, "final_layer_norm": {"bias": 2.3034279346466064, "scale": 22.071582794189453}, "layer_norm": {"bias": 2.1650390625, "scale": 23.092153549194336}}, "2": {"attention": {"k_proj": {"bias": 0.03653004392981529, "kernel": 46.15614318847656}, "out_proj": {"bias": 1.2130026817321777, "kernel": 43.853614807128906}, "q_proj": {"bias": 3.041682243347168, "kernel": 45.9248046875}, "v_proj": {"bias": 0.30890217423439026, "kernel": 43.85284423828125}}, "feed_forward": {"intermediate_dense": {"bias": 1.6167690753936768, "kernel": 98.25933074951172}, "output_dense": {"bias": 0.6920140981674194, "kernel": 87.242431640625}}, "final_layer_norm": {"bias": 1.4544155597686768, "scale": 20.98486328125}, "layer_norm": {"bias": 1.667765736579895, "scale": 22.059188842773438}}, "20": {"attention": {"k_proj": {"bias": 0.0037271142937242985, "kernel": 49.5183219909668}, "out_proj": {"bias": 1.2461135387420654, "kernel": 47.375213623046875}, "q_proj": {"bias": 2.7794594764709473, "kernel": 50.30613327026367}, "v_proj": {"bias": 0.3623378872871399, "kernel": 46.273372650146484}}, "feed_forward": {"intermediate_dense": {"bias": 1.9209908246994019, "kernel": 104.0489501953125}, "output_dense": {"bias": 1.0497362613677979, "kernel": 101.65393829345703}}, "final_layer_norm": {"bias": 2.333451509475708, "scale": 23.015880584716797}, "layer_norm": {"bias": 2.1426877975463867, "scale": 23.235729217529297}}, "21": {"attention": {"k_proj": {"bias": 0.03329595923423767, "kernel": 49.96206283569336}, "out_proj": {"bias": 1.2808051109313965, "kernel": 47.415626525878906}, "q_proj": {"bias": 2.7249975204467773, "kernel": 50.80659103393555}, "v_proj": {"bias": 0.4175662398338318, "kernel": 46.52143096923828}}, "feed_forward": {"intermediate_dense": {"bias": 1.9632256031036377, "kernel": 104.24440002441406}, "output_dense": {"bias": 1.1203250885009766, "kernel": 101.97819519042969}}, "final_layer_norm": {"bias": 2.3604397773742676, "scale": 22.662506103515625}, "layer_norm": {"bias": 2.2138638496398926, "scale": 23.515274047851562}}, "22": {"attention": {"k_proj": {"bias": 0.012683916836977005, "kernel": 50.35405349731445}, "out_proj": {"bias": 1.1991708278656006, "kernel": 46.8690185546875}, "q_proj": {"bias": 2.81036376953125, "kernel": 50.73627853393555}, "v_proj": {"bias": 0.37005093693733215, "kernel": 46.736412048339844}}, "feed_forward": {"intermediate_dense": {"bias": 1.8951349258422852, "kernel": 104.63954162597656}, "output_dense": {"bias": 1.1302428245544434, "kernel": 101.25523376464844}}, "final_layer_norm": {"bias": 2.2466814517974854, "scale": 22.182716369628906}, "layer_norm": {"bias": 2.2098731994628906, "scale": 22.525482177734375}}, "23": {"attention": {"k_proj": {"bias": 0.120549276471138, "kernel": 51.46174621582031}, "out_proj": {"bias": 1.3264572620391846, "kernel": 47.87089920043945}, "q_proj": {"bias": 2.6404333114624023, "kernel": 51.57463836669922}, "v_proj": {"bias": 0.5210777521133423, "kernel": 48.51097869873047}}, "feed_forward": {"intermediate_dense": {"bias": 1.8714258670806885, "kernel": 104.43405151367188}, "output_dense": {"bias": 1.108646273612976, "kernel": 102.05201721191406}}, "final_layer_norm": {"bias": 2.491565465927124, "scale": 22.138986587524414}, "layer_norm": {"bias": 2.697974920272827, "scale": 23.734621047973633}}, "24": {"attention": {"k_proj": {"bias": 0.05398482829332352, "kernel": 49.93202590942383}, "out_proj": {"bias": 1.3825275897979736, "kernel": 49.85151290893555}, "q_proj": {"bias": 2.803582191467285, "kernel": 49.924556732177734}, "v_proj": {"bias": 0.47551417350769043, "kernel": 49.927547454833984}}, "feed_forward": {"intermediate_dense": {"bias": 1.990923285484314, "kernel": 103.90543365478516}, "output_dense": {"bias": 1.1458380222320557, "kernel": 104.9254150390625}}, "final_layer_norm": {"bias": 2.5995006561279297, "scale": 22.194211959838867}, "layer_norm": {"bias": 2.419203042984009, "scale": 23.26953887939453}}, "25": {"attention": {"k_proj": {"bias": 0.04450356587767601, "kernel": 50.49494934082031}, "out_proj": {"bias": 1.1957685947418213, "kernel": 47.765289306640625}, "q_proj": {"bias": 2.878711223602295, "kernel": 50.281700134277344}, "v_proj": {"bias": 0.5583701133728027, "kernel": 48.30183410644531}}, "feed_forward": {"intermediate_dense": {"bias": 1.8914387226104736, "kernel": 104.17427062988281}, "output_dense": {"bias": 1.0262477397918701, "kernel": 104.85499572753906}}, "final_layer_norm": {"bias": 2.305722713470459, "scale": 22.72964096069336}, "layer_norm": {"bias": 2.583354949951172, "scale": 22.42755126953125}}, "26": {"attention": {"k_proj": {"bias": 0.07086975127458572, "kernel": 50.69786071777344}, "out_proj": {"bias": 1.1329269409179688, "kernel": 48.53632354736328}, "q_proj": {"bias": 2.8333656787872314, "kernel": 50.4635124206543}, "v_proj": {"bias": 0.49598926305770874, "kernel": 49.14326477050781}}, "feed_forward": {"intermediate_dense": {"bias": 1.983544111251831, "kernel": 103.60462951660156}, "output_dense": {"bias": 0.9868446588516235, "kernel": 102.02833557128906}}, "final_layer_norm": {"bias": 1.9362690448760986, "scale": 21.587879180908203}, "layer_norm": {"bias": 2.4858784675598145, "scale": 22.868398666381836}}, "27": {"attention": {"k_proj": {"bias": 0.3732529580593109, "kernel": 51.34492492675781}, "out_proj": {"bias": 1.3583134412765503, "kernel": 49.85731506347656}, "q_proj": {"bias": 2.618594169616699, "kernel": 51.20601272583008}, "v_proj": {"bias": 0.5683501958847046, "kernel": 50.311988830566406}}, "feed_forward": {"intermediate_dense": {"bias": 2.143080472946167, "kernel": 101.88013458251953}, "output_dense": {"bias": 0.8687618970870972, "kernel": 101.70985412597656}}, "final_layer_norm": {"bias": 2.2165541648864746, "scale": 20.85378646850586}, "layer_norm": {"bias": 2.5526351928710938, "scale": 23.540470123291016}}, "28": {"attention": {"k_proj": {"bias": 0.4096335768699646, "kernel": 52.2803955078125}, "out_proj": {"bias": 1.386794090270996, "kernel": 50.622737884521484}, "q_proj": {"bias": 2.7664031982421875, "kernel": 51.92483901977539}, "v_proj": {"bias": 0.4615659713745117, "kernel": 50.95015335083008}}, "feed_forward": {"intermediate_dense": {"bias": 2.089618682861328, "kernel": 101.88746643066406}, "output_dense": {"bias": 0.7711120843887329, "kernel": 103.90321350097656}}, "final_layer_norm": {"bias": 2.126192092895508, "scale": 21.172107696533203}, "layer_norm": {"bias": 2.054711103439331, "scale": 24.408409118652344}}, "29": {"attention": {"k_proj": {"bias": 0.06762012839317322, "kernel": 48.740055084228516}, "out_proj": {"bias": 1.365987777709961, "kernel": 53.140220642089844}, "q_proj": {"bias": 2.7382171154022217, "kernel": 48.56138610839844}, "v_proj": {"bias": 0.41890132427215576, "kernel": 53.04469299316406}}, "feed_forward": {"intermediate_dense": {"bias": 2.090895175933838, "kernel": 102.560302734375}, "output_dense": {"bias": 0.872062623500824, "kernel": 108.16770935058594}}, "final_layer_norm": {"bias": 2.3700876235961914, "scale": 22.302989959716797}, "layer_norm": {"bias": 2.1496353149414062, "scale": 25.385906219482422}}, "3": {"attention": {"k_proj": {"bias": 0.1204313337802887, "kernel": 50.125701904296875}, "out_proj": {"bias": 1.3624417781829834, "kernel": 46.49477005004883}, "q_proj": {"bias": 2.7182188034057617, "kernel": 50.35295104980469}, "v_proj": {"bias": 0.3000553846359253, "kernel": 46.894187927246094}}, "feed_forward": {"intermediate_dense": {"bias": 1.6320147514343262, "kernel": 99.90219116210938}, "output_dense": {"bias": 0.6522164344787598, "kernel": 90.09829711914062}}, "final_layer_norm": {"bias": 1.7125478982925415, "scale": 21.080535888671875}, "layer_norm": {"bias": 1.8284051418304443, "scale": 23.59416961669922}}, "30": {"attention": {"k_proj": {"bias": 0.25504398345947266, "kernel": 50.66249084472656}, "out_proj": {"bias": 1.159855604171753, "kernel": 49.41553497314453}, "q_proj": {"bias": 2.799100637435913, "kernel": 50.74445343017578}, "v_proj": {"bias": 0.48367470502853394, "kernel": 49.75730895996094}}, "feed_forward": {"intermediate_dense": {"bias": 2.026671886444092, "kernel": 103.06732177734375}, "output_dense": {"bias": 0.8243669271469116, "kernel": 107.15858459472656}}, "final_layer_norm": {"bias": 2.1945126056671143, "scale": 23.442333221435547}, "layer_norm": {"bias": 2.301931381225586, "scale": 25.115337371826172}}, "31": {"attention": {"k_proj": {"bias": 0.35389411449432373, "kernel": 49.18547058105469}, "out_proj": {"bias": 1.0851024389266968, "kernel": 50.28276824951172}, "q_proj": {"bias": 2.581451892852783, "kernel": 49.28723907470703}, "v_proj": {"bias": 0.5289448499679565, "kernel": 50.41181182861328}}, "feed_forward": {"intermediate_dense": {"bias": 2.104063034057617, "kernel": 101.75663757324219}, "output_dense": {"bias": 1.002120852470398, "kernel": 104.55183410644531}}, "final_layer_norm": {"bias": 2.082719087600708, "scale": 23.341012954711914}, "layer_norm": {"bias": 2.2975897789001465, "scale": 24.888717651367188}}, "32": {"attention": {"k_proj": {"bias": 0.207120880484581, "kernel": 48.02813720703125}, "out_proj": {"bias": 1.093928337097168, "kernel": 49.468353271484375}, "q_proj": {"bias": 2.8447179794311523, "kernel": 48.01123809814453}, "v_proj": {"bias": 0.39654308557510376, "kernel": 49.75498962402344}}, "feed_forward": {"intermediate_dense": {"bias": 2.033989906311035, "kernel": 100.62407684326172}, "output_dense": {"bias": 1.0630019903182983, "kernel": 103.89726257324219}}, "final_layer_norm": {"bias": 2.044450283050537, "scale": 23.776098251342773}, "layer_norm": {"bias": 2.2476887702941895, "scale": 25.156360626220703}}, "33": {"attention": {"k_proj": {"bias": 0.2086963802576065, "kernel": 47.967620849609375}, "out_proj": {"bias": 1.1307460069656372, "kernel": 49.31550598144531}, "q_proj": {"bias": 2.9888792037963867, "kernel": 47.95985412597656}, "v_proj": {"bias": 0.42853063344955444, "kernel": 49.58100509643555}}, "feed_forward": {"intermediate_dense": {"bias": 2.041210174560547, "kernel": 99.00303649902344}, "output_dense": {"bias": 1.0359094142913818, "kernel": 102.67219543457031}}, "final_layer_norm": {"bias": 1.9568297863006592, "scale": 23.543582916259766}, "layer_norm": {"bias": 2.442399501800537, "scale": 25.396568298339844}}, "34": {"attention": {"k_proj": {"bias": 0.2259853184223175, "kernel": 47.180938720703125}, "out_proj": {"bias": 1.3792003393173218, "kernel": 50.800682067871094}, "q_proj": {"bias": 2.8644118309020996, "kernel": 47.234779357910156}, "v_proj": {"bias": 0.39719992876052856, "kernel": 50.735965728759766}}, "feed_forward": {"intermediate_dense": {"bias": 2.121295690536499, "kernel": 97.8389892578125}, "output_dense": {"bias": 0.9670619368553162, "kernel": 101.99024963378906}}, "final_layer_norm": {"bias": 1.9000396728515625, "scale": 23.19698715209961}, "layer_norm": {"bias": 2.5240490436553955, "scale": 25.779953002929688}}, "35": {"attention": {"k_proj": {"bias": 0.35745763778686523, "kernel": 48.91041564941406}, "out_proj": {"bias": 1.2976853847503662, "kernel": 49.656803131103516}, "q_proj": {"bias": 2.615135908126831, "kernel": 49.244117736816406}, "v_proj": {"bias": 0.4806705713272095, "kernel": 49.48078155517578}}, "feed_forward": {"intermediate_dense": {"bias": 2.2015764713287354, "kernel": 96.44864654541016}, "output_dense": {"bias": 0.8609927296638489, "kernel": 100.73100280761719}}, "final_layer_norm": {"bias": 1.9790458679199219, "scale": 23.321218490600586}, "layer_norm": {"bias": 2.285153388977051, "scale": 26.278472900390625}}, "36": {"attention": {"k_proj": {"bias": 0.19027814269065857, "kernel": 46.2192497253418}, "out_proj": {"bias": 1.3382506370544434, "kernel": 50.997901916503906}, "q_proj": {"bias": 2.6993062496185303, "kernel": 46.20930480957031}, "v_proj": {"bias": 0.36446213722229004, "kernel": 51.18187713623047}}, "feed_forward": {"intermediate_dense": {"bias": 2.0755391120910645, "kernel": 95.54883575439453}, "output_dense": {"bias": 0.8958422541618347, "kernel": 100.42840576171875}}, "final_layer_norm": {"bias": 1.618175745010376, "scale": 23.848108291625977}, "layer_norm": {"bias": 2.0090999603271484, "scale": 25.78015899658203}}, "37": {"attention": {"k_proj": {"bias": 0.5269804000854492, "kernel": 45.260040283203125}, "out_proj": {"bias": 1.599480152130127, "kernel": 50.98283386230469}, "q_proj": {"bias": 2.3939435482025146, "kernel": 45.33077621459961}, "v_proj": {"bias": 0.36002129316329956, "kernel": 50.852684020996094}}, "feed_forward": {"intermediate_dense": {"bias": 1.971336841583252, "kernel": 94.80709075927734}, "output_dense": {"bias": 0.9046251773834229, "kernel": 100.2008285522461}}, "final_layer_norm": {"bias": 1.4468027353286743, "scale": 24.250158309936523}, "layer_norm": {"bias": 1.978513479232788, "scale": 25.821224212646484}}, "38": {"attention": {"k_proj": {"bias": 0.6130686402320862, "kernel": 43.44226837158203}, "out_proj": {"bias": 1.2996063232421875, "kernel": 50.466278076171875}, "q_proj": {"bias": 2.3290963172912598, "kernel": 43.45414733886719}, "v_proj": {"bias": 0.41826310753822327, "kernel": 50.33799743652344}}, "feed_forward": {"intermediate_dense": {"bias": 1.9165147542953491, "kernel": 92.85403442382812}, "output_dense": {"bias": 0.8927580118179321, "kernel": 98.45118713378906}}, "final_layer_norm": {"bias": 1.493051290512085, "scale": 24.96658706665039}, "layer_norm": {"bias": 2.1560826301574707, "scale": 26.53356170654297}}, "39": {"attention": {"k_proj": {"bias": 0.6430894136428833, "kernel": 43.21688461303711}, "out_proj": {"bias": 1.5947363376617432, "kernel": 50.339542388916016}, "q_proj": {"bias": 2.110431671142578, "kernel": 43.605262756347656}, "v_proj": {"bias": 0.38870078325271606, "kernel": 50.012779235839844}}, "feed_forward": {"intermediate_dense": {"bias": 1.9106833934783936, "kernel": 91.17467498779297}, "output_dense": {"bias": 0.972097635269165, "kernel": 98.83366394042969}}, "final_layer_norm": {"bias": 1.6390502452850342, "scale": 25.59851837158203}, "layer_norm": {"bias": 2.1347782611846924, "scale": 27.176971435546875}}, "4": {"attention": {"k_proj": {"bias": 0.13430944085121155, "kernel": 52.684295654296875}, "out_proj": {"bias": 1.544208288192749, "kernel": 47.894325256347656}, "q_proj": {"bias": 2.5189812183380127, "kernel": 52.865440368652344}, "v_proj": {"bias": 0.34665393829345703, "kernel": 48.25187683105469}}, "feed_forward": {"intermediate_dense": {"bias": 1.621163010597229, "kernel": 99.48369598388672}, "output_dense": {"bias": 0.8153223991394043, "kernel": 91.32151794433594}}, "final_layer_norm": {"bias": 1.7989122867584229, "scale": 20.61126708984375}, "layer_norm": {"bias": 1.922861099243164, "scale": 23.966323852539062}}, "40": {"attention": {"k_proj": {"bias": 0.5842467546463013, "kernel": 42.578712463378906}, "out_proj": {"bias": 1.5376503467559814, "kernel": 48.99298095703125}, "q_proj": {"bias": 2.046619176864624, "kernel": 43.344635009765625}, "v_proj": {"bias": 0.44114208221435547, "kernel": 48.57102584838867}}, "feed_forward": {"intermediate_dense": {"bias": 1.770835518836975, "kernel": 89.44320678710938}, "output_dense": {"bias": 1.0244522094726562, "kernel": 96.0932846069336}}, "final_layer_norm": {"bias": 1.798896312713623, "scale": 24.869396209716797}, "layer_norm": {"bias": 2.0778615474700928, "scale": 26.723310470581055}}, "41": {"attention": {"k_proj": {"bias": 1.6699845790863037, "kernel": 39.915489196777344}, "out_proj": {"bias": 1.2989803552627563, "kernel": 50.55731964111328}, "q_proj": {"bias": 1.7253488302230835, "kernel": 40.6795654296875}, "v_proj": {"bias": 0.3975880742073059, "kernel": 49.50791931152344}}, "feed_forward": {"intermediate_dense": {"bias": 1.9115654230117798, "kernel": 86.23811340332031}, "output_dense": {"bias": 1.0470619201660156, "kernel": 95.14247131347656}}, "final_layer_norm": {"bias": 2.297964096069336, "scale": 28.32220458984375}, "layer_norm": {"bias": 2.1078972816467285, "scale": 28.513172149658203}}, "42": {"attention": {"k_proj": {"bias": 0.7960059642791748, "kernel": 36.708290100097656}, "out_proj": {"bias": 1.3383876085281372, "kernel": 44.78962707519531}, "q_proj": {"bias": 1.5444276332855225, "kernel": 38.05863952636719}, "v_proj": {"bias": 0.5880073308944702, "kernel": 43.13645935058594}}, "feed_forward": {"intermediate_dense": {"bias": 1.6485475301742554, "kernel": 85.23059844970703}, "output_dense": {"bias": 1.0999541282653809, "kernel": 93.34835052490234}}, "final_layer_norm": {"bias": 2.021839141845703, "scale": 29.62232780456543}, "layer_norm": {"bias": 1.5734193325042725, "scale": 27.38504409790039}}, "43": {"attention": {"k_proj": {"bias": 1.2087428569793701, "kernel": 33.226219177246094}, "out_proj": {"bias": 1.3321952819824219, "kernel": 41.184059143066406}, "q_proj": {"bias": 1.3534941673278809, "kernel": 34.040531158447266}, "v_proj": {"bias": 0.5191360712051392, "kernel": 39.07907485961914}}, "feed_forward": {"intermediate_dense": {"bias": 1.6821610927581787, "kernel": 84.4572525024414}, "output_dense": {"bias": 0.8659600019454956, "kernel": 91.27842712402344}}, "final_layer_norm": {"bias": 1.9458153247833252, "scale": 31.839672088623047}, "layer_norm": {"bias": 1.6934361457824707, "scale": 25.536218643188477}}, "44": {"attention": {"k_proj": {"bias": 2.490312099456787, "kernel": 33.81727600097656}, "out_proj": {"bias": 1.0943963527679443, "kernel": 44.90924072265625}, "q_proj": {"bias": 1.2867296934127808, "kernel": 34.189945220947266}, "v_proj": {"bias": 0.3796514570713043, "kernel": 44.002098083496094}}, "feed_forward": {"intermediate_dense": {"bias": 1.761549472808838, "kernel": 83.41246795654297}, "output_dense": {"bias": 0.8125085830688477, "kernel": 88.93128967285156}}, "final_layer_norm": {"bias": 1.9331786632537842, "scale": 34.012088775634766}, "layer_norm": {"bias": 1.586810827255249, "scale": 25.554336547851562}}, "45": {"attention": {"k_proj": {"bias": 2.048306465148926, "kernel": 33.657684326171875}, "out_proj": {"bias": 0.9800734519958496, "kernel": 48.50779724121094}, "q_proj": {"bias": 1.3633925914764404, "kernel": 33.844757080078125}, "v_proj": {"bias": 0.43038854002952576, "kernel": 48.665504455566406}}, "feed_forward": {"intermediate_dense": {"bias": 1.8786242008209229, "kernel": 80.08413696289062}, "output_dense": {"bias": 0.9476521015167236, "kernel": 84.32176208496094}}, "final_layer_norm": {"bias": 1.676342487335205, "scale": 32.72097396850586}, "layer_norm": {"bias": 1.5172195434570312, "scale": 24.071025848388672}}, "46": {"attention": {"k_proj": {"bias": 1.5383708477020264, "kernel": 34.8392333984375}, "out_proj": {"bias": 0.7450060844421387, "kernel": 50.938255310058594}, "q_proj": {"bias": 1.532208800315857, "kernel": 34.96090316772461}, "v_proj": {"bias": 0.371351957321167, "kernel": 51.6893196105957}}, "feed_forward": {"intermediate_dense": {"bias": 1.9411137104034424, "kernel": 74.41993713378906}, "output_dense": {"bias": 1.1016592979431152, "kernel": 74.62105560302734}}, "final_layer_norm": {"bias": 1.6790804862976074, "scale": 28.232065200805664}, "layer_norm": {"bias": 1.3348368406295776, "scale": 22.986331939697266}}, "47": {"attention": {"k_proj": {"bias": 0.25895360112190247, "kernel": 37.09356689453125}, "out_proj": {"bias": 0.6301657557487488, "kernel": 45.20797348022461}, "q_proj": {"bias": 1.650336742401123, "kernel": 37.740478515625}, "v_proj": {"bias": 0.34607622027397156, "kernel": 46.190757751464844}}, "feed_forward": {"intermediate_dense": {"bias": 1.9951748847961426, "kernel": 71.75222778320312}, "output_dense": {"bias": 0.6057071685791016, "kernel": 68.11976623535156}}, "final_layer_norm": {"bias": 1.520125150680542, "scale": 23.069990158081055}, "layer_norm": {"bias": 1.0598437786102295, "scale": 20.234466552734375}}, "5": {"attention": {"k_proj": {"bias": 0.00840836763381958, "kernel": 48.02847671508789}, "out_proj": {"bias": 1.5279333591461182, "kernel": 49.12567138671875}, "q_proj": {"bias": 2.6153364181518555, "kernel": 48.17101287841797}, "v_proj": {"bias": 0.3096908926963806, "kernel": 49.92131805419922}}, "feed_forward": {"intermediate_dense": {"bias": 1.5453948974609375, "kernel": 99.59476470947266}, "output_dense": {"bias": 0.8454799652099609, "kernel": 90.61784362792969}}, "final_layer_norm": {"bias": 2.0749590396881104, "scale": 20.825382232666016}, "layer_norm": {"bias": 1.95145845413208, "scale": 23.389982223510742}}, "6": {"attention": {"k_proj": {"bias": 0.20033928751945496, "kernel": 49.63896179199219}, "out_proj": {"bias": 1.5183320045471191, "kernel": 48.44209289550781}, "q_proj": {"bias": 2.6636147499084473, "kernel": 50.118431091308594}, "v_proj": {"bias": 0.31253746151924133, "kernel": 48.97197723388672}}, "feed_forward": {"intermediate_dense": {"bias": 1.5235824584960938, "kernel": 98.68842315673828}, "output_dense": {"bias": 0.6965881586074829, "kernel": 90.20870971679688}}, "final_layer_norm": {"bias": 2.375554323196411, "scale": 20.302406311035156}, "layer_norm": {"bias": 1.9553532600402832, "scale": 23.748838424682617}}, "7": {"attention": {"k_proj": {"bias": 0.19373320043087006, "kernel": 49.441287994384766}, "out_proj": {"bias": 1.3339287042617798, "kernel": 48.69111633300781}, "q_proj": {"bias": 2.439426898956299, "kernel": 49.83055877685547}, "v_proj": {"bias": 0.39850425720214844, "kernel": 48.65386199951172}}, "feed_forward": {"intermediate_dense": {"bias": 1.529695987701416, "kernel": 98.43458557128906}, "output_dense": {"bias": 0.5387250185012817, "kernel": 89.9488525390625}}, "final_layer_norm": {"bias": 2.214036464691162, "scale": 20.541275024414062}, "layer_norm": {"bias": 1.8610401153564453, "scale": 22.47254753112793}}, "8": {"attention": {"k_proj": {"bias": 0.1718023270368576, "kernel": 48.945213317871094}, "out_proj": {"bias": 1.1568043231964111, "kernel": 49.24149703979492}, "q_proj": {"bias": 2.4160404205322266, "kernel": 48.7120361328125}, "v_proj": {"bias": 0.3257639408111572, "kernel": 49.424400329589844}}, "feed_forward": {"intermediate_dense": {"bias": 1.5825791358947754, "kernel": 98.04054260253906}, "output_dense": {"bias": 0.4940677881240845, "kernel": 89.3731689453125}}, "final_layer_norm": {"bias": 2.1672048568725586, "scale": 20.330373764038086}, "layer_norm": {"bias": 1.7923117876052856, "scale": 22.934362411499023}}, "9": {"attention": {"k_proj": {"bias": 0.20644523203372955, "kernel": 49.55120849609375}, "out_proj": {"bias": 1.357471227645874, "kernel": 50.029640197753906}, "q_proj": {"bias": 2.3735475540161133, "kernel": 49.710205078125}, "v_proj": {"bias": 0.3331097960472107, "kernel": 50.43877410888672}}, "feed_forward": {"intermediate_dense": {"bias": 1.662867546081543, "kernel": 96.64881896972656}, "output_dense": {"bias": 0.6358088254928589, "kernel": 89.91731262207031}}, "final_layer_norm": {"bias": 2.056180477142334, "scale": 19.610380172729492}, "layer_norm": {"bias": 1.8849740028381348, "scale": 24.290634155273438}}}, "pos_conv_embed": {"conv": {"bias": 5.547986030578613, "weight_g": 8.80840015411377, "weight_v": 84.6180648803711}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 2.0290679931640625, "kernel": 20.55536460876465}, "layer_norm": {"bias": 4.550922393798828, "scale": 16.167570114135742}}, "1": {"conv": {"bias": 1.7790228128433228, "kernel": 51.24136734008789}, "layer_norm": {"bias": 5.962646961212158, "scale": 23.268157958984375}}, "2": {"conv": {"bias": 1.140576720237732, "kernel": 46.50312042236328}, "layer_norm": {"bias": 4.176670551300049, "scale": 20.370853424072266}}, "3": {"conv": {"bias": 0.6725863218307495, "kernel": 44.397525787353516}, "layer_norm": {"bias": 3.888174533843994, "scale": 17.53795051574707}}, "4": {"conv": {"bias": 0.6373162269592285, "kernel": 41.314056396484375}, "layer_norm": {"bias": 2.385471820831299, "scale": 16.34571647644043}}, "5": {"conv": {"bias": 0.5147221684455872, "kernel": 37.479759216308594}, "layer_norm": {"bias": 2.020900011062622, "scale": 17.064470291137695}}, "6": {"conv": {"bias": 0.4947893023490906, "kernel": 40.64780044555664}, "layer_norm": {"bias": 0.5876954793930054, "scale": 19.058603286743164}}}}, "feature_projection": {"layer_norm": {"bias": 6.33607292175293, "scale": 16.545515060424805}, "projection": {"bias": 1.6633964776992798, "kernel": 34.67955017089844}}, "masked_spec_embed": 11.914372444152832}}, "train/learning_rate": 2.4749970179982483e-06, "train/loss": 3.565929889678955, "train/param_norm": 1185.912109375, "_runtime": 3511, "_timestamp": 1659188616, "_step": 100, "_wandb": {"runtime": 3512}}
\ No newline at end of file
diff --git a/wandb/run-20220730_124505-101ubxa3/logs/debug-internal.log b/wandb/run-20220730_124505-101ubxa3/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..ecc8595371217f43b00a149894bc31bfb7564acd
--- /dev/null
+++ b/wandb/run-20220730_124505-101ubxa3/logs/debug-internal.log
@@ -0,0 +1,1110 @@
+2022-07-30 12:45:06,617 INFO    MainThread:3213310 [internal.py:wandb_internal():87] W&B internal server running at pid: 3213310, started at: 2022-07-30 12:45:06.617158
+2022-07-30 12:45:06,619 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: check_version
+2022-07-30 12:45:06,619 INFO    WriterThread:3213310 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/run-101ubxa3.wandb
+2022-07-30 12:45:06,620 DEBUG   SenderThread:3213310 [sender.py:send():234] send: header
+2022-07-30 12:45:06,620 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: check_version
+2022-07-30 12:45:06,657 DEBUG   SenderThread:3213310 [sender.py:send():234] send: run
+2022-07-30 12:45:06,845 INFO    SenderThread:3213310 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files
+2022-07-30 12:45:06,845 INFO    SenderThread:3213310 [sender.py:_start_run_threads():804] run started: 101ubxa3 with start time 1659185105
+2022-07-30 12:45:06,845 DEBUG   SenderThread:3213310 [sender.py:send():234] send: summary
+2022-07-30 12:45:06,845 INFO    SenderThread:3213310 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 12:45:06,846 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: run_start
+2022-07-30 12:45:07,852 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json
+2022-07-30 12:45:09,201 DEBUG   HandlerThread:3213310 [meta.py:__init__():40] meta init
+2022-07-30 12:45:09,201 DEBUG   HandlerThread:3213310 [meta.py:__init__():54] meta init done
+2022-07-30 12:45:09,201 DEBUG   HandlerThread:3213310 [meta.py:probe():214] probe
+2022-07-30 12:45:09,203 DEBUG   HandlerThread:3213310 [meta.py:_setup_git():204] setup git
+2022-07-30 12:45:09,243 DEBUG   HandlerThread:3213310 [meta.py:_setup_git():211] setup git done
+2022-07-30 12:45:09,243 DEBUG   HandlerThread:3213310 [meta.py:_save_code():92] save code
+2022-07-30 12:45:09,256 DEBUG   HandlerThread:3213310 [meta.py:_save_code():113] save code done
+2022-07-30 12:45:09,256 DEBUG   HandlerThread:3213310 [meta.py:_save_patches():130] save patches
+2022-07-30 12:45:09,329 DEBUG   HandlerThread:3213310 [meta.py:_save_patches():172] save patches done
+2022-07-30 12:45:09,329 DEBUG   HandlerThread:3213310 [meta.py:_save_pip():58] save pip
+2022-07-30 12:45:09,330 DEBUG   HandlerThread:3213310 [meta.py:_save_pip():72] save pip done
+2022-07-30 12:45:09,330 DEBUG   HandlerThread:3213310 [meta.py:probe():252] probe done
+2022-07-30 12:45:09,333 DEBUG   SenderThread:3213310 [sender.py:send():234] send: files
+2022-07-30 12:45:09,334 INFO    SenderThread:3213310 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-30 12:45:09,334 INFO    SenderThread:3213310 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-30 12:45:09,335 INFO    SenderThread:3213310 [sender.py:_save_file():939] saving file diff.patch with policy now
+2022-07-30 12:45:09,340 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:45:09,341 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:45:09,793 INFO    Thread-11 :3213310 [upload_job.py:push():137] Uploaded file /tmp/tmpxk6jhkl0wandb/2jcn1x2y-wandb-metadata.json
+2022-07-30 12:45:09,798 INFO    Thread-13 :3213310 [upload_job.py:push():137] Uploaded file /tmp/tmpxk6jhkl0wandb/35hrcegs-diff.patch
+2022-07-30 12:45:09,852 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/wandb-metadata.json
+2022-07-30 12:45:09,852 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/requirements.txt
+2022-07-30 12:45:09,852 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:45:09,852 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/code/run_flax_speech_recognition_ctc.py
+2022-07-30 12:45:09,852 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/diff.patch
+2022-07-30 12:45:09,852 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/code
+2022-07-30 12:45:10,017 INFO    Thread-12 :3213310 [upload_job.py:push():137] Uploaded file /tmp/tmpxk6jhkl0wandb/1gn9s8a2-code/run_flax_speech_recognition_ctc.py
+2022-07-30 12:45:11,853 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:45:13,853 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:45:15,854 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:45:17,855 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:45:23,858 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:45:24,476 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:45:24,476 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:45:25,859 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:45:37,287 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:45:39,613 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:45:39,613 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:45:39,866 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:45:41,867 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:45:47,870 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:45:49,871 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:45:51,872 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:45:54,838 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:45:54,838 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:46:04,877 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:46:06,878 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:46:07,361 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:46:09,970 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:46:09,970 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:46:25,129 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:46:25,130 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:46:37,434 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:46:40,325 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:46:40,325 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:46:44,894 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:46:46,895 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:46:48,896 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:46:50,896 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:46:52,897 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:46:54,898 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:46:55,470 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:46:55,470 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:46:56,899 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:46:59,900 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:01,902 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:03,902 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:05,903 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:07,507 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:47:07,904 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:09,905 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:10,659 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:47:10,659 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:47:11,906 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:13,907 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:15,908 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:17,909 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:19,910 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:21,911 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:23,911 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:25,837 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:47:25,837 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:47:25,912 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:27,913 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:29,914 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:31,915 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:33,917 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:35,922 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:37,575 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:47:37,923 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:39,924 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:40,983 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:47:40,983 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:47:41,925 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:43,926 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:45,927 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:47,928 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:49,929 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:51,930 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:53,932 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:55,934 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:56,129 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:47:56,129 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:47:57,935 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:47:59,936 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:01,937 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:03,938 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:05,939 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:07,656 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:48:07,942 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:09,943 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:11,275 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:48:11,275 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:48:11,944 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:13,945 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:15,946 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:17,947 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:19,948 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:21,949 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:23,950 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:25,951 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:26,411 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:48:26,411 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:48:27,953 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:29,954 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:31,959 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:33,958 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:35,959 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:37,746 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:48:37,960 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:39,961 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:41,594 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:48:41,595 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:48:41,962 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:43,963 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:45,966 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:47,967 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:49,968 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:51,969 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:53,970 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:55,971 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:56,741 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:48:56,741 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:48:57,972 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:48:59,973 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:01,975 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:03,975 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:05,977 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:07,828 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:49:07,978 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:09,978 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:11,901 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:49:11,901 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:49:11,980 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:13,984 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:16,985 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:18,986 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:20,987 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:22,988 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:24,989 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:26,990 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:27,047 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:49:27,047 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:49:28,993 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:30,994 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:32,995 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:34,996 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:36,997 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:37,907 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:49:38,998 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:40,999 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:42,190 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:49:42,190 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:49:43,000 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:45,001 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:47,002 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:49,003 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:51,004 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:53,005 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:55,005 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:57,007 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:49:57,360 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:49:57,360 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:49:59,007 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:01,008 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:03,010 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:05,011 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:07,012 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:07,985 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:50:09,012 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:11,013 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:12,517 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:50:12,517 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:50:13,014 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:15,015 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:17,016 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:19,017 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:21,018 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:23,019 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:25,020 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:27,021 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:27,672 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:50:27,673 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:50:29,022 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:31,023 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:33,024 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:35,025 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:37,026 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:38,058 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:50:39,028 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:41,029 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:42,815 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:50:42,815 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:50:43,029 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:45,030 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:47,031 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:49,033 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:51,033 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:53,034 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:55,035 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:57,037 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:50:57,998 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:50:57,999 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:50:59,038 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:51:01,039 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:51:03,040 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:51:05,041 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:51:07,042 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:51:08,133 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:51:09,043 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:51:11,044 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:51:13,045 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:51:13,144 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:51:13,144 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:51:28,283 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:51:28,283 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:51:38,215 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:51:43,434 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:51:43,434 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:51:58,575 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:51:58,575 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:52:05,067 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:52:07,068 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:52:08,288 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:52:09,069 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:52:11,070 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:52:13,071 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:52:13,716 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:52:13,716 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:52:15,072 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:52:28,860 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:52:28,861 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:52:38,358 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:52:44,095 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:52:44,095 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:52:47,086 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:52:49,087 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:52:51,088 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:52:53,089 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:52:55,089 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:52:57,090 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:52:59,091 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:52:59,239 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:52:59,239 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:53:01,092 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:53:03,093 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:53:05,094 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:53:07,095 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:53:08,436 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:53:09,096 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:53:11,097 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:53:13,098 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:53:14,376 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:53:14,377 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:53:15,098 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:53:23,102 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:53:25,103 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:53:27,104 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:53:29,105 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:53:29,516 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:53:29,517 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:53:31,106 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:53:38,508 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:53:44,655 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:53:44,655 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:53:59,827 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:53:59,828 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:54:04,119 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:06,120 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:08,121 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:08,586 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:54:10,123 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:12,124 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:14,127 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:15,107 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:54:15,107 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:54:16,128 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:18,129 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:20,130 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:22,131 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:24,132 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:26,133 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:28,134 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:30,135 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:30,262 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:54:30,262 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:54:32,136 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:34,137 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:36,138 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:38,139 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:38,659 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:54:40,140 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:42,141 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:44,142 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:45,396 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:54:45,396 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:54:46,143 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:48,144 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:50,145 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:52,147 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:54,147 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:56,148 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:54:58,149 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:55:00,150 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:55:00,598 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:55:00,598 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:55:02,151 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:55:04,152 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:55:06,153 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:55:08,154 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:55:08,768 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:55:10,155 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:55:12,156 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:55:14,157 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:55:15,738 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:55:15,738 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:55:16,158 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:55:18,159 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:55:20,160 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:55:22,161 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:55:24,163 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:55:30,876 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:55:30,876 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:55:38,848 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:55:46,017 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:55:46,018 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:56:01,241 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:56:01,241 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:56:04,179 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:56:08,926 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:56:09,181 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:56:15,184 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:56:16,404 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:56:16,405 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:56:19,186 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:56:25,188 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:56:29,190 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:56:31,191 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:56:31,542 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:56:31,542 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:56:35,193 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:56:37,194 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:56:38,998 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:56:40,195 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:56:46,948 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:56:46,949 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:56:48,199 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:57:02,212 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:57:02,213 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:57:09,068 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:57:17,413 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:57:17,413 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:57:21,212 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:57:30,215 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:57:32,216 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 12:57:32,586 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:57:32,586 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:57:39,135 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:57:47,750 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:57:47,750 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:58:02,898 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:58:02,899 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:58:09,202 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:58:18,035 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:58:18,035 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:58:33,178 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:58:33,179 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:58:39,274 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:58:48,322 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:58:48,323 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:59:03,460 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:59:03,461 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:59:09,352 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:59:18,648 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:59:18,648 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:59:33,784 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:59:33,784 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 12:59:39,428 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 12:59:48,918 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 12:59:48,918 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:00:04,064 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:00:04,065 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:00:09,507 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:00:19,201 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:00:19,201 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:00:34,362 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:00:34,362 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:00:39,578 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:00:49,498 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:00:49,498 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:01:04,632 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:01:04,632 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:01:06,305 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:01:09,652 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:01:19,785 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:01:19,786 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:01:35,613 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:01:35,614 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:01:39,720 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:01:50,783 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:01:50,784 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:02:05,947 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:02:05,948 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:02:09,790 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:02:21,092 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:02:21,093 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:02:36,227 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:02:36,227 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:02:39,851 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:02:51,379 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:02:51,379 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:03:06,513 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:03:06,513 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:03:09,911 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:03:21,647 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:03:21,648 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:03:36,784 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:03:36,785 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:03:39,971 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:03:51,917 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:03:51,918 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:04:07,054 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:04:07,054 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:04:10,032 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:04:22,188 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:04:22,189 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:04:37,331 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:04:37,331 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:04:40,103 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:04:52,469 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:04:52,470 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:05:07,622 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:05:07,622 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:05:10,260 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:05:22,759 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:05:22,759 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:05:37,898 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:05:37,898 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:05:40,392 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:05:53,030 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:05:53,030 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:05:59,441 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:06:08,183 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:06:08,183 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:06:10,469 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:06:23,365 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:06:23,366 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:06:38,541 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:06:38,541 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:06:40,541 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:06:53,708 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:06:53,708 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:07:08,856 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:07:08,857 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:07:10,613 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:07:23,993 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:07:23,994 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:07:39,131 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:07:39,132 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:07:40,685 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:07:54,269 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:07:54,269 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:08:09,404 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:08:09,404 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:08:10,758 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:08:24,535 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:08:24,535 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:08:39,691 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:08:39,692 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:08:40,832 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:08:54,829 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:08:54,830 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:09:09,962 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:09:09,962 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:09:10,906 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:09:25,099 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:09:25,100 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:09:40,232 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:09:40,232 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:09:40,985 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:09:55,370 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:09:55,370 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:10:10,509 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:10:10,510 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:10:11,110 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:10:25,644 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:10:25,645 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:10:38,564 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:10:40,914 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:10:40,914 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:10:41,186 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:10:56,126 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:10:56,126 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:11:11,252 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:11:12,387 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:11:12,388 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:11:27,560 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:11:27,560 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:11:41,317 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:11:42,716 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:11:42,717 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:11:57,856 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:11:57,856 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:12:11,384 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:12:12,991 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:12:12,991 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:12:28,125 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:12:28,126 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:12:41,447 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:12:43,268 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:12:43,268 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:12:58,401 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:12:58,401 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:13:11,511 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:13:13,539 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:13:13,540 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:13:28,676 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:13:28,676 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:13:41,574 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:13:43,810 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:13:43,810 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:13:58,947 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:13:58,947 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:14:11,645 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:14:14,081 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:14:14,081 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:14:29,223 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:14:29,224 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:14:41,758 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:14:44,371 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:14:44,372 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:14:59,512 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:14:59,512 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:15:11,833 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:15:13,678 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:15:14,723 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:15:14,724 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:15:21,681 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:15:29,685 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:15:29,989 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:15:29,989 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:15:35,687 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:15:41,690 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:15:41,906 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:15:45,300 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:15:45,300 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:16:00,579 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:16:00,579 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:16:11,977 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:16:15,736 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:16:15,736 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:16:30,900 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:16:30,901 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:16:42,049 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:16:46,055 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:16:46,056 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:17:01,192 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:17:01,192 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:17:12,124 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:17:16,331 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:17:16,331 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:17:31,491 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:17:31,491 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:17:42,202 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:17:46,625 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:17:46,626 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:18:01,759 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:18:01,760 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:18:12,279 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:18:16,894 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:18:16,895 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:18:32,031 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:18:32,032 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:18:42,353 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:18:47,165 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:18:47,165 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:19:02,299 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:19:02,300 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:19:12,434 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:19:17,435 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:19:17,435 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:19:32,568 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:19:32,569 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:19:42,522 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:19:47,713 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:19:47,713 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:20:02,846 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:20:02,846 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:20:07,800 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:20:12,601 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:20:14,802 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:20:17,994 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:20:17,995 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:20:20,805 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:20:26,807 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:20:32,810 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:20:33,202 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:20:33,202 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:20:36,812 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:20:42,678 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:20:42,814 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:20:48,480 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:20:48,480 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:20:48,816 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:20:52,818 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:20:57,820 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:21:04,520 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:21:04,520 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:21:12,749 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:21:19,684 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:21:19,684 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:21:35,447 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:21:35,447 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:21:42,819 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:21:50,612 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:21:50,612 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:22:05,781 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:22:05,781 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:22:12,892 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:22:20,914 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:22:20,914 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:22:36,115 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:22:36,115 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:22:42,967 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:22:51,252 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:22:51,252 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:23:06,386 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:23:06,387 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:23:13,048 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:23:21,529 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:23:21,529 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:23:36,663 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:23:36,663 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:23:43,122 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:23:51,796 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:23:51,797 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:24:06,930 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:24:06,930 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:24:13,195 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:24:22,074 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:24:22,075 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:24:37,210 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:24:37,211 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:24:43,275 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:24:52,347 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:24:52,347 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:25:07,485 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:25:07,485 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:25:13,345 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:25:22,735 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:25:22,736 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:25:36,933 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:25:37,929 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:25:37,930 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:25:42,936 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:25:43,420 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:25:46,937 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:25:50,939 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:25:53,203 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:25:53,203 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:25:55,941 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:25:59,943 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:26:03,944 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:26:07,946 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:26:08,456 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:26:08,457 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:26:11,948 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:26:13,488 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:26:15,950 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:26:17,951 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:26:21,953 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:26:23,680 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:26:23,680 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:26:25,955 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:26:29,957 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:26:38,861 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:26:38,861 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:26:43,562 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:26:54,036 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:26:54,037 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:27:09,197 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:27:09,197 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:27:13,633 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:27:24,357 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:27:24,357 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:27:39,503 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:27:39,504 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:27:43,705 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:27:54,636 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:27:54,636 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:28:09,771 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:28:09,771 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:28:13,775 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:28:24,903 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:28:24,903 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:28:40,037 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:28:40,037 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:28:43,847 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:28:55,172 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:28:55,172 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:29:10,312 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:29:10,312 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:29:13,913 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:29:25,452 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:29:25,452 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:29:40,583 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:29:40,583 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:29:43,983 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:29:55,726 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:29:55,727 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:30:10,863 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:30:10,863 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:30:14,064 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:30:25,994 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:30:25,994 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:30:41,136 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:30:41,136 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:30:44,160 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:30:56,075 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:30:56,364 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:30:56,364 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:31:00,076 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:31:02,077 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:31:04,078 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:31:08,080 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:31:10,081 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:31:11,604 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:31:11,604 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:31:12,082 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:31:14,083 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:31:14,232 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:31:16,084 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:31:18,084 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:31:20,085 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:31:22,086 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:31:24,087 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:31:26,088 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:31:26,782 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:31:26,782 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:31:41,945 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:31:41,945 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:31:44,299 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:31:57,117 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:31:57,118 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:32:12,306 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:32:12,307 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:32:14,371 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:32:27,461 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:32:27,461 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:32:42,600 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:32:42,600 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:32:44,438 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:32:57,733 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:32:57,733 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:33:12,866 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:33:12,867 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:33:14,502 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:33:28,002 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:33:28,002 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:33:43,160 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:33:43,160 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:33:44,567 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:33:58,360 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:33:58,360 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:34:13,502 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:34:13,502 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:34:14,630 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:34:28,643 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:34:28,644 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:34:43,773 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:34:43,774 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:34:44,700 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:34:58,909 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:34:58,909 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:35:14,047 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:35:14,048 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:35:14,771 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:35:29,206 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:35:29,207 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:35:44,363 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:35:44,364 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:35:44,843 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:35:46,204 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:35:48,205 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:35:50,206 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:35:59,530 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:35:59,531 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:36:04,211 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:36:14,672 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:36:14,672 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:36:14,917 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:36:18,216 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:36:28,220 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:36:29,880 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:36:29,881 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:36:36,223 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:36:44,992 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:36:45,187 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:36:45,187 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:36:45,226 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:36:53,229 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:36:59,232 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:37:00,576 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:37:00,577 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:37:05,234 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:37:13,237 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:37:15,058 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:37:16,030 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:37:16,030 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:37:19,240 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:37:26,242 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:37:30,244 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:37:31,288 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:37:31,288 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:37:36,246 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:37:42,248 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:37:45,121 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:37:46,250 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:37:46,486 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:37:46,486 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:37:52,252 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:37:56,254 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:38:01,802 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:38:01,802 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:38:03,257 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:38:07,258 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:38:13,260 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:38:15,183 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:38:17,054 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:38:17,054 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:38:17,262 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:38:21,264 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:38:25,265 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:38:31,268 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:38:32,270 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:38:32,271 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:38:35,269 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:38:39,271 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:38:43,272 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:38:45,251 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:38:45,273 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:38:47,491 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:38:47,492 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:38:49,275 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:38:54,277 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:38:58,278 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:02,280 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:02,659 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:39:02,660 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:39:08,282 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:12,284 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:14,284 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:15,317 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:39:17,882 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:39:17,882 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:39:18,286 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:20,287 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:22,288 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:26,289 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:28,290 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:30,291 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:32,292 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:33,072 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:39:33,072 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:39:34,293 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:36,294 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:38,295 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:40,295 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:42,296 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:44,297 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:45,389 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:39:46,298 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:39:48,375 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:39:48,376 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:39:59,303 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:40:03,766 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:40:03,766 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:40:07,306 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:40:15,309 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:40:15,462 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:40:18,999 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:40:18,999 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:40:23,312 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:40:31,315 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:40:34,399 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:40:34,399 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:40:38,318 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:40:44,320 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:40:45,535 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:40:49,639 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:40:49,639 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:40:52,324 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:40:58,326 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:41:04,329 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:41:04,858 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:41:04,858 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:41:08,330 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:41:14,332 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:41:15,607 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:41:20,174 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:41:20,175 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:41:21,335 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:41:27,337 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:41:31,339 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:41:35,474 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:41:35,474 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:41:37,341 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:41:41,343 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:41:45,680 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:41:47,345 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:41:50,752 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:41:50,753 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:41:51,347 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:41:55,348 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:41:59,350 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:05,986 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:42:05,987 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:42:06,353 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:10,354 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:14,356 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:15,754 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:42:18,358 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:21,265 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:42:21,265 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:42:22,359 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:26,361 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:28,362 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:32,363 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:36,365 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:36,509 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:42:36,510 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:42:40,367 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:42,368 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:45,826 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:42:47,370 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:49,370 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:51,762 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:42:51,762 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:42:53,372 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:55,373 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:57,374 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:42:59,375 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:43:03,377 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:43:05,378 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:43:06,968 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:43:06,969 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:43:07,378 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:43:09,379 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:43:13,381 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:43:15,382 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:43:15,898 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:43:17,383 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:43:21,385 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:43:22,146 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:43:22,146 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:43:23,386 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:43:25,387 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:43:27,387 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:43:37,016 DEBUG   SenderThread:3213310 [sender.py:send():234] send: history
+2022-07-30 13:43:37,020 DEBUG   SenderThread:3213310 [sender.py:send():234] send: summary
+2022-07-30 13:43:37,025 INFO    SenderThread:3213310 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 13:43:37,391 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json
+2022-07-30 13:43:38,392 INFO    Thread-8  :3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:43:39,441 DEBUG   SenderThread:3213310 [sender.py:send():234] send: telemetry
+2022-07-30 13:43:39,442 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 13:43:39,442 DEBUG   SenderThread:3213310 [sender.py:send():234] send: exit
+2022-07-30 13:43:39,442 INFO    SenderThread:3213310 [sender.py:send_exit():366] handling exit code: 1
+2022-07-30 13:43:39,443 INFO    SenderThread:3213310 [sender.py:send_exit():368] handling runtime: 3512
+2022-07-30 13:43:39,446 INFO    SenderThread:3213310 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 13:43:39,446 INFO    SenderThread:3213310 [sender.py:send_exit():374] send defer
+2022-07-30 13:43:39,446 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 13:43:39,447 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 13:43:39,447 INFO    HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-30 13:43:39,447 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: defer
+2022-07-30 13:43:39,447 INFO    SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-30 13:43:39,447 INFO    SenderThread:3213310 [sender.py:transition_state():387] send defer: 1
+2022-07-30 13:43:39,447 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 13:43:39,447 INFO    HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-30 13:43:39,454 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: defer
+2022-07-30 13:43:39,454 INFO    SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-30 13:43:39,454 INFO    SenderThread:3213310 [sender.py:transition_state():387] send defer: 2
+2022-07-30 13:43:39,454 DEBUG   SenderThread:3213310 [sender.py:send():234] send: stats
+2022-07-30 13:43:39,455 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 13:43:39,455 INFO    HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-30 13:43:39,455 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: defer
+2022-07-30 13:43:39,455 INFO    SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-30 13:43:39,455 INFO    SenderThread:3213310 [sender.py:transition_state():387] send defer: 3
+2022-07-30 13:43:39,455 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 13:43:39,455 INFO    HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-30 13:43:39,457 DEBUG   SenderThread:3213310 [sender.py:send():234] send: summary
+2022-07-30 13:43:39,461 INFO    SenderThread:3213310 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 13:43:39,461 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: defer
+2022-07-30 13:43:39,461 INFO    SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-30 13:43:39,461 INFO    SenderThread:3213310 [sender.py:transition_state():387] send defer: 4
+2022-07-30 13:43:39,461 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 13:43:39,461 INFO    HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-30 13:43:39,462 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: defer
+2022-07-30 13:43:39,462 INFO    SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-30 13:43:39,548 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 13:43:39,677 INFO    SenderThread:3213310 [sender.py:transition_state():387] send defer: 5
+2022-07-30 13:43:39,677 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 13:43:39,677 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 13:43:39,678 INFO    HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-30 13:43:39,678 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: defer
+2022-07-30 13:43:39,678 INFO    SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-30 13:43:39,678 INFO    SenderThread:3213310 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-30 13:43:39,779 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 13:43:40,393 INFO    SenderThread:3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:43:40,393 INFO    SenderThread:3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/config.yaml
+2022-07-30 13:43:40,393 INFO    SenderThread:3213310 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json
+2022-07-30 13:43:40,394 INFO    SenderThread:3213310 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files
+2022-07-30 13:43:40,394 INFO    SenderThread:3213310 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/config.yaml config.yaml
+2022-07-30 13:43:40,394 INFO    SenderThread:3213310 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/diff.patch diff.patch
+2022-07-30 13:43:40,394 INFO    SenderThread:3213310 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/requirements.txt requirements.txt
+2022-07-30 13:43:40,397 INFO    SenderThread:3213310 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log output.log
+2022-07-30 13:43:40,397 INFO    SenderThread:3213310 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json wandb-summary.json
+2022-07-30 13:43:40,398 INFO    SenderThread:3213310 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/wandb-metadata.json wandb-metadata.json
+2022-07-30 13:43:40,400 INFO    SenderThread:3213310 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-30 13:43:40,401 INFO    SenderThread:3213310 [sender.py:transition_state():387] send defer: 6
+2022-07-30 13:43:40,401 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 13:43:40,407 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 13:43:40,407 INFO    HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-30 13:43:40,407 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: defer
+2022-07-30 13:43:40,408 INFO    SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-30 13:43:40,408 INFO    SenderThread:3213310 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 13:43:40,506 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 13:43:40,506 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 13:43:40,608 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 13:43:40,608 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 13:43:40,710 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 13:43:40,710 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 13:43:40,812 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 13:43:40,812 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 13:43:40,877 INFO    Thread-17 :3213310 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/wandb-summary.json
+2022-07-30 13:43:40,883 INFO    Thread-14 :3213310 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/config.yaml
+2022-07-30 13:43:40,898 INFO    Thread-15 :3213310 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/requirements.txt
+2022-07-30 13:43:40,914 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 13:43:40,914 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 13:43:41,016 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 13:43:41,016 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 13:43:41,118 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 13:43:41,118 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 13:43:41,139 INFO    Thread-16 :3213310 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/files/output.log
+2022-07-30 13:43:41,220 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 13:43:41,220 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 13:43:41,322 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 13:43:41,322 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 13:43:41,339 INFO    Thread-7  :3213310 [sender.py:transition_state():387] send defer: 7
+2022-07-30 13:43:41,340 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 13:43:41,340 INFO    HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-30 13:43:41,340 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: defer
+2022-07-30 13:43:41,340 INFO    SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-30 13:43:41,424 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 13:43:42,442 INFO    SenderThread:3213310 [sender.py:transition_state():387] send defer: 8
+2022-07-30 13:43:42,442 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 13:43:42,443 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 13:43:42,443 INFO    HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-30 13:43:42,443 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: defer
+2022-07-30 13:43:42,443 INFO    SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-30 13:43:42,443 INFO    SenderThread:3213310 [sender.py:transition_state():387] send defer: 9
+2022-07-30 13:43:42,444 DEBUG   SenderThread:3213310 [sender.py:send():234] send: final
+2022-07-30 13:43:42,444 DEBUG   SenderThread:3213310 [sender.py:send():234] send: footer
+2022-07-30 13:43:42,444 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 13:43:42,444 INFO    HandlerThread:3213310 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-30 13:43:42,444 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: defer
+2022-07-30 13:43:42,444 INFO    SenderThread:3213310 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-30 13:43:42,544 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 13:43:42,544 DEBUG   SenderThread:3213310 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 13:43:42,544 INFO    SenderThread:3213310 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 13:43:42,809 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-30 13:43:42,814 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-30 13:43:42,815 DEBUG   HandlerThread:3213310 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-30 13:43:42,815 INFO    HandlerThread:3213310 [handler.py:finish():731] shutting down handler
+2022-07-30 13:43:43,445 INFO    WriterThread:3213310 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/run-101ubxa3.wandb
+2022-07-30 13:43:43,808 INFO    SenderThread:3213310 [sender.py:finish():1070] shutting down sender
+2022-07-30 13:43:43,808 INFO    SenderThread:3213310 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 13:43:43,808 INFO    SenderThread:3213310 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 13:43:43,856 INFO    MainThread:3213310 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220730_124505-101ubxa3/logs/debug.log b/wandb/run-20220730_124505-101ubxa3/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..3889692f0a3d75f04ba41391256d56f540066bac
--- /dev/null
+++ b/wandb/run-20220730_124505-101ubxa3/logs/debug.log
@@ -0,0 +1,159 @@
+2022-07-30 12:45:05,741 INFO    MainThread:3212038 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-30 12:45:05,741 INFO    MainThread:3212038 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-30 12:45:05,741 INFO    MainThread:3212038 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/logs/debug.log
+2022-07-30 12:45:05,741 INFO    MainThread:3212038 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_124505-101ubxa3/logs/debug-internal.log
+2022-07-30 12:45:05,742 INFO    MainThread:3212038 [wandb_init.py:init():404] calling init triggers
+2022-07-30 12:45:05,742 INFO    MainThread:3212038 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-30 12:45:05,742 INFO    MainThread:3212038 [wandb_init.py:init():460] starting backend
+2022-07-30 12:45:05,742 INFO    MainThread:3212038 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-30 12:45:05,785 INFO    MainThread:3212038 [backend.py:ensure_launched():216] starting backend process...
+2022-07-30 12:45:05,826 INFO    MainThread:3212038 [backend.py:ensure_launched():221] started backend process with pid: 3213310
+2022-07-30 12:45:05,828 INFO    MainThread:3212038 [wandb_init.py:init():469] backend started and connected
+2022-07-30 12:45:05,841 INFO    MainThread:3212038 [wandb_init.py:init():533] updated telemetry
+2022-07-30 12:45:05,942 INFO    MainThread:3212038 [wandb_init.py:init():563] communicating current version
+2022-07-30 12:45:06,656 INFO    MainThread:3212038 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-30 12:45:06,656 INFO    MainThread:3212038 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-30 12:45:06,845 INFO    MainThread:3212038 [wandb_init.py:init():606] starting run threads in backend
+2022-07-30 12:45:09,337 INFO    MainThread:3212038 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-30 12:45:09,338 INFO    MainThread:3212038 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-30 12:45:09,338 INFO    MainThread:3212038 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-30 12:45:09,340 INFO    MainThread:3212038 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-30 12:45:09,340 INFO    MainThread:3212038 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-30 13:43:36,388 INFO    MainThread:3212038 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-30 13:43:37,009 INFO    MainThread:3212038 [wandb_run.py:_restore():1752] restore
+2022-07-30 13:43:39,447 INFO    MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 74351
+}
+
+2022-07-30 13:43:39,678 INFO    MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 74351
+}
+
+2022-07-30 13:43:40,405 INFO    MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 417315
+}
+
+2022-07-30 13:43:40,507 INFO    MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 417315
+}
+
+2022-07-30 13:43:40,609 INFO    MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 417315
+  total_bytes: 417315
+}
+
+2022-07-30 13:43:40,711 INFO    MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 417315
+  total_bytes: 417315
+}
+
+2022-07-30 13:43:40,813 INFO    MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 417315
+  total_bytes: 417315
+}
+
+2022-07-30 13:43:40,915 INFO    MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 417315
+  total_bytes: 417315
+}
+
+2022-07-30 13:43:41,017 INFO    MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 417315
+  total_bytes: 417315
+}
+
+2022-07-30 13:43:41,119 INFO    MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 417315
+  total_bytes: 417315
+}
+
+2022-07-30 13:43:41,221 INFO    MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 417315
+  total_bytes: 417315
+}
+
+2022-07-30 13:43:41,323 INFO    MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 417315
+  total_bytes: 417315
+}
+
+2022-07-30 13:43:42,443 INFO    MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 417315
+  total_bytes: 417315
+}
+
+2022-07-30 13:43:42,808 INFO    MainThread:3212038 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 417315
+  total_bytes: 417315
+}
+local_info {
+}
+
+2022-07-30 13:43:44,539 INFO    MainThread:3212038 [wandb_run.py:_append_history():2130] rendering history
+2022-07-30 13:43:44,540 INFO    MainThread:3212038 [wandb_run.py:_append_summary():2085] rendering summary
+2022-07-30 13:43:44,540 INFO    MainThread:3212038 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220730_124505-101ubxa3/run-101ubxa3.wandb b/wandb/run-20220730_124505-101ubxa3/run-101ubxa3.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..9e9bbb994f94d21243c2bc8b59bbd49f15caa492
--- /dev/null
+++ b/wandb/run-20220730_124505-101ubxa3/run-101ubxa3.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1eb5e98d359963f8ef635f536561c6dd8813a3353f09776016ed802720a46201
+size 577536
diff --git a/wandb/run-20220730_135604-y1b5rbiq/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_135604-y1b5rbiq/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..d846a57f2375127bc169f9735151ad564083efac
--- /dev/null
+++ b/wandb/run-20220730_135604-y1b5rbiq/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1605 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc["train"]) / (len(npsc["train"]) + len(npsc["validation"]))  # Use same train/val ratio as NPSC
+    nst_train = nst["train"].train_test_split(train_size=split, seed=seed)
+    nst["train"] = nst_train["train"]
+    nst["validation"] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc["train"].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst["train"].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in "train", "validation", "test":
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets["train"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets["eval"] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        for split in test_split:
+            raw_datasets[split] = raw_datasets[split].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets["train"] = raw_datasets["train"].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets["train"])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets["eval"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets["train"], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets["train"][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix="train")
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in test_split:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220730_135604-y1b5rbiq/files/config.yaml b/wandb/run-20220730_135604-y1b5rbiq/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d087d516b40328c4f00700dc5c86a4eb1aa22196
--- /dev/null
+++ b/wandb/run-20220730_135604-y1b5rbiq/files/config.yaml
@@ -0,0 +1,33 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659189364
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      2:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220730_135604-y1b5rbiq/files/diff.patch b/wandb/run-20220730_135604-y1b5rbiq/files/diff.patch
new file mode 100644
index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642
--- /dev/null
+++ b/wandb/run-20220730_135604-y1b5rbiq/files/diff.patch
@@ -0,0 +1,10 @@
+diff --git a/.gitattributes b/.gitattributes
+index 755356a..f0eef4b 100644
+--- a/.gitattributes
++++ b/.gitattributes
+@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+ *.zip filter=lfs diff=lfs merge=lfs -text
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
++*.wandb filter=lfs diff=lfs merge=lfs -text
+\ No newline at end of file
diff --git a/wandb/run-20220730_135604-y1b5rbiq/files/output.log b/wandb/run-20220730_135604-y1b5rbiq/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..7b40cae250aeb9a966336aa92b84cadcc597bbaa
--- /dev/null
+++ b/wandb/run-20220730_135604-y1b5rbiq/files/output.log
@@ -0,0 +1,2165 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul30_13-55-59_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=default,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=12,
+per_device_train_batch_size=12,
+precision=full_mixed,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: default
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 48.48it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 301.86it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('project_hid', 'kernel'), ('quantizer', 'weight_proj', 'kernel'), ('project_q', 'kernel'), ('project_hid', 'bias'), ('quantizer', 'weight_proj', 'bias'), ('quantizer', 'codevectors'), ('project_q', 'bias')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'bias'), ('lm_head', 'kernel')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 9016.64ex/s]
+removing punctuation from train split #3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8987.54ex/s]
+removing punctuation from train split #2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8774.46ex/s]
+removing punctuation from train split #4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8781.68ex/s]
+removing punctuation from train split #1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7529.19ex/s]
+removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8682.85ex/s]
+removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8696.66ex/s]
+removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8419.58ex/s]
+removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7780.02ex/s]
+removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8423.60ex/s]
+removing punctuation from train split #9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8314.01ex/s]
+removing punctuation from train split #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8360.27ex/s]
+removing punctuation from train split #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8388.07ex/s]
+removing punctuation from train split #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8076.60ex/s]
+removing punctuation from train split #14: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8385.87ex/s]
+removing punctuation from train split #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8247.25ex/s]
+removing punctuation from train split #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8282.62ex/s]
+removing punctuation from train split #9:  90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                    | 8561/9523 [00:01<00:00, 8038.51ex/s]
+removing punctuation from train split #10:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                  | 8631/9523 [00:01<00:00, 8237.86ex/s]
+removing punctuation from train split #9:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 9410/9523 [00:01<00:00, 8168.88ex/s]
+removing punctuation from train split #12:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                     | 7719/9522 [00:00<00:00, 8021.73ex/s]
+removing punctuation from train split #12:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                   | 8581/9522 [00:01<00:00, 8196.22ex/s]
+removing punctuation from train split #11:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 9412/9523 [00:01<00:00, 8255.67ex/s]
+removing punctuation from train split #13:  77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                            | 7342/9522 [00:00<00:00, 7892.60ex/s]
+removing punctuation from train split #12:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9434/9522 [00:01<00:00, 8293.39ex/s]
+removing punctuation from train split #13:  86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                           | 8193/9522 [00:01<00:00, 8072.17ex/s]
+removing punctuation from train split #13:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏         | 9047/9522 [00:01<00:00, 8208.50ex/s]
+removing punctuation from train split #14:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                   | 8565/9522 [00:01<00:00, 8252.60ex/s]
+removing punctuation from train split #14:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9434/9522 [00:01<00:00, 8381.45ex/s]
+removing punctuation from train split #16:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                     | 8460/9522 [00:01<00:00, 8176.76ex/s]
+removing punctuation from train split #17:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                       | 7594/9522 [00:00<00:00, 8040.57ex/s]
+removing punctuation from train split #18:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                         | 6741/9522 [00:00<00:00, 8561.35ex/s]
+removing punctuation from train split #16:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍    | 9301/9522 [00:01<00:00, 8245.09ex/s]
+removing punctuation from train split #20:  62%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                           | 5874/9522 [00:00<00:00, 8506.59ex/s]
+removing punctuation from train split #17:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉    | 9322/9522 [00:01<00:00, 8341.78ex/s]
+removing punctuation from train split #18:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                     | 8468/9522 [00:01<00:00, 8226.73ex/s]
+removing punctuation from train split #19:  79%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                          | 7480/9522 [00:00<00:00, 8193.61ex/s]
+removing punctuation from train split #22:  52%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                             | 4987/9522 [00:00<00:00, 8517.27ex/s]
+removing punctuation from train split #21:  70%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                          | 6707/9522 [00:00<00:00, 8578.07ex/s]
+removing punctuation from train split #22:  61%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                           | 5839/9522 [00:00<00:00, 8351.90ex/s]
+removing punctuation from train split #23:  47%|████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                       | 4513/9522 [00:00<00:00, 7286.27ex/s]
+removing punctuation from train split #24:  53%|███████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                            | 5016/9522 [00:00<00:00, 8458.83ex/s]
+removing punctuation from train split #27:  26%|██████████████████████████████████████████████████▉                                                                                                                                                 | 2476/9522 [00:00<00:00, 8333.80ex/s]
+removing punctuation from train split #26:  42%|█████████████████████████████████████████████████████████████████████████████████▌                                                                                                                  | 3960/9522 [00:00<00:00, 8242.89ex/s]
+removing punctuation from train split #27:  35%|████████████████████████████████████████████████████████████████████▋                                                                                                                               | 3339/9522 [00:00<00:00, 8449.30ex/s]
+removing punctuation from train split #28:  25%|████████████████████████████████████████████████▌                                                                                                                                                   | 2361/9522 [00:00<00:00, 8018.54ex/s]
+removing punctuation from train split #29:  26%|██████████████████████████████████████████████████▍                                                                                                                                                 | 2449/9522 [00:00<00:00, 8231.06ex/s]
+removing punctuation from train split #25:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                    | 8523/9522 [00:01<00:00, 7439.03ex/s]
+removing punctuation from train split #26:  88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                        | 8361/9522 [00:01<00:00, 7965.30ex/s]
+removing punctuation from train split #27:  74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                   | 7019/9522 [00:00<00:00, 9069.67ex/s]
+removing punctuation from train split #25:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 9443/9522 [00:01<00:00, 7922.19ex/s]
+removing punctuation from train split #27:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                               | 7979/9522 [00:00<00:00, 9233.51ex/s]
+removing punctuation from train split #26:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉     | 9277/9522 [00:01<00:00, 8308.04ex/s]
+removing punctuation from train split #27:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏           | 8948/9522 [00:01<00:00, 9372.69ex/s]
+removing punctuation from train split #28:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                      | 7663/9522 [00:00<00:00, 8311.71ex/s]
+removing punctuation from train split #30:  73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                    | 6963/9522 [00:00<00:00, 8726.86ex/s]
+removing punctuation from train split #28:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                   | 8597/9522 [00:01<00:00, 8617.07ex/s]
+removing punctuation from train split #29:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                | 8736/9522 [00:01<00:00, 8466.25ex/s]
+removing punctuation from train split #30:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍              | 8813/9522 [00:01<00:00, 8988.36ex/s]
+removing punctuation from train split #31:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                              | 8046/9522 [00:00<00:00, 8800.51ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00020_of_00032.arrow8945/9522 [00:01<00:00, 8834.57ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow
+preprocess dataset #0:   0%|                                                                                                                                                                                                                                     | 0/9497 [00:00<?, ?ex/s]
+preprocess dataset #1:   0%|▉                                                                                                                                                                                                                           | 42/9497 [00:02<04:48, 32.78ex/s]
+preprocess dataset #2:   0%|▊                                                                                                                                                                                                                           | 36/9497 [00:01<04:44, 33.29ex/s]
+preprocess dataset #3:   0%|▏                                                                                                                                                                                                                            | 9/9497 [00:00<11:19, 13.95ex/s]
+preprocess dataset #4:   1%|█▎                                                                                                                                                                                                                          | 57/9497 [00:01<03:29, 44.97ex/s]
+preprocess dataset #5:   0%|▊                                                                                                                                                                                                                           | 34/9497 [00:01<03:52, 40.76ex/s]
+preprocess dataset #6:   1%|█▍                                                                                                                                                                                                                          | 64/9497 [00:02<03:34, 44.02ex/s]
+preprocess dataset #7:   0%|▊                                                                                                                                                                                                                           | 37/9497 [00:01<03:42, 42.43ex/s]
+preprocess dataset #8:   2%|███▍                                                                                                                                                                                                                       | 150/9497 [00:03<02:55, 53.13ex/s]
+preprocess dataset #9:   1%|█▋                                                                                                                                                                                                                          | 72/9497 [00:02<03:40, 42.71ex/s]
+preprocess dataset #10:   0%|▋                                                                                                                                                                                                                          | 32/9497 [00:01<04:19, 36.44ex/s]
+preprocess dataset #11:   1%|█▋                                                                                                                                                                                                                         | 75/9496 [00:02<03:21, 46.79ex/s]
+preprocess dataset #12:   0%|▎                                                                                                                                                                                                                          | 16/9496 [00:01<07:02, 22.43ex/s]
+preprocess dataset #13:   1%|█▊                                                                                                                                                                                                                         | 80/9496 [00:02<03:43, 42.08ex/s]
+preprocess dataset #14:   0%|▉                                                                                                                                                                                                                          | 38/9496 [00:01<04:20, 36.27ex/s]
+preprocess dataset #15:   1%|█▋                                                                                                                                                                                                                         | 72/9496 [00:02<03:56, 39.78ex/s]
+preprocess dataset #16:   0%|▋                                                                                                                                                                                                                          | 29/9496 [00:01<05:21, 29.45ex/s]
+preprocess dataset #17:   1%|██▏                                                                                                                                                                                                                        | 93/9496 [00:02<03:00, 52.01ex/s]
+preprocess dataset #18:   0%|▉                                                                                                                                                                                                                          | 43/9496 [00:01<03:38, 43.22ex/s]
+preprocess dataset #19:   1%|█▊                                                                                                                                                                                                                         | 80/9496 [00:02<03:10, 49.53ex/s]
+preprocess dataset #20:   0%|▏                                                                                                                                                                                                                           | 8/9496 [00:01<14:15, 11.09ex/s]
+preprocess dataset #21:   1%|█▍                                                                                                                                                                                                                         | 60/9496 [00:02<04:43, 33.34ex/s]
+preprocess dataset #22:   0%|▋                                                                                                                                                                                                                          | 31/9496 [00:01<05:53, 26.79ex/s]
+preprocess dataset #23:   0%|▊                                                                                                                                                                                                                          | 36/9496 [00:01<05:35, 28.20ex/s]
+preprocess dataset #24:   1%|█▉                                                                                                                                                                                                                         | 83/9496 [00:03<04:20, 36.08ex/s]
+preprocess dataset #25:   0%|▊                                                                                                                                                                                                                          | 34/9496 [00:01<05:24, 29.16ex/s]
+preprocess dataset #26:   0%|▏                                                                                                                                                                                                                           | 8/9496 [00:00<14:06, 11.21ex/s]
+preprocess dataset #27:   0%|█                                                                                                                                                                                                                          | 46/9496 [00:02<06:11, 25.44ex/s]
+preprocess dataset #28:   1%|█▍                                                                                                                                                                                                                         | 65/9496 [00:03<07:02, 22.32ex/s]
+preprocess dataset #29:   0%|▋                                                                                                                                                                                                                          | 30/9496 [00:01<06:36, 23.87ex/s]
+preprocess dataset #30:   0%|                                                                                                                                                                                                                          | 1/9496 [00:00<2:02:29,  1.29ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  66%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                         | 6275/9496 [03:31<01:38, 32.74ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                           | 7587/9496 [04:13<00:57, 33.07ex/s]
+preprocess dataset #30:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                          | 7647/9496 [04:15<00:50, 36.93ex/s]
+
+preprocess dataset #30:  82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                       | 7783/9496 [04:19<00:50, 34.08ex/s]
+preprocess dataset #30:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                     | 7843/9496 [04:21<00:44, 37.10ex/s]
+
+
+preprocess dataset #30:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                  | 8001/9496 [04:27<02:17, 10.86ex/s]
+preprocess dataset #29:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏          | 9025/9496 [04:30<00:19, 24.22ex/s]
+preprocess dataset #30:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                | 8071/9496 [04:29<00:35, 39.99ex/s]
+
+
+
+
+preprocess dataset #30:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                        | 8449/9496 [04:39<00:28, 36.92ex/s]
+preprocess dataset #30:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                       | 8493/9496 [04:40<00:26, 38.11ex/s]
+preprocess dataset #28:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                     | 8575/9496 [04:46<00:25, 36.75ex/s]
+preprocess dataset #30:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                    | 8593/9496 [04:43<00:22, 39.58ex/s]
+preprocess dataset #30:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                    | 8613/9496 [04:44<00:22, 38.39ex/s]
+preprocess dataset #30:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                  | 8682/9496 [04:45<00:18, 44.82ex/s]
+preprocess dataset #30:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                 | 8757/9496 [04:47<00:18, 39.53ex/s]
+preprocess dataset #30:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████               | 8844/9496 [04:49<00:13, 48.51ex/s]
+preprocess dataset #30:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌              | 8863/9496 [04:50<00:13, 45.63ex/s]
+preprocess dataset #30:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎            | 8941/9496 [04:51<00:11, 50.14ex/s]
+preprocess dataset #30:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉           | 9010/9496 [04:53<00:22, 21.89ex/s]
+preprocess dataset #30:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████          | 9063/9496 [04:55<00:08, 49.96ex/s]
+preprocess dataset #30:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋         | 9087/9496 [04:55<00:09, 41.41ex/s]
+preprocess dataset #30:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████        | 9149/9496 [04:57<00:07, 46.96ex/s]
+preprocess dataset #30:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉       | 9187/9496 [04:57<00:06, 48.48ex/s]
+preprocess dataset #30:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎    | 9289/9496 [04:59<00:03, 62.26ex/s]
+preprocess dataset #27:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊      | 9225/9496 [05:05<00:04, 60.82ex/s]
+preprocess dataset #31:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊      | 9224/9496 [04:59<00:04, 59.72ex/s]
+preprocess dataset #30:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 9403/9496 [05:01<00:01, 61.61ex/s]
+preprocess dataset #27:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 9369/9496 [05:07<00:01, 76.67ex/s]
+preprocess dataset #27:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 9378/9496 [05:07<00:01, 78.45ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 9491/9496 [05:03<00:00, 45.55ex/s]
+preprocess dataset #31:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 9386/9496 [05:02<00:01, 76.29ex/s]
+preprocess dataset #31:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9427/9496 [05:02<00:00, 75.25ex/s]
+
+preprocess dataset #14:   0%|▊                                                                                                                                                                                                                           | 5/1267 [00:00<02:53,  7.29ex/s]
+preprocess dataset #15:   0%|▊                                                                                                                                                                                                                           | 5/1267 [00:00<02:50,  7.42ex/s]
+preprocess dataset #16:   0%|▊                                                                                                                                                                                                                           | 5/1267 [00:00<02:44,  7.69ex/s]
+preprocess dataset #17:   0%|▏                                                                                                                                                                                                                           | 1/1267 [00:00<16:48,  1.26ex/s]
+preprocess dataset #18:   0%|▏                                                                                                                                                                                                                           | 1/1267 [00:00<16:20,  1.29ex/s]
+preprocess dataset #19:   0%|▏                                                                                                                                                                                                                           | 1/1267 [00:00<15:24,  1.37ex/s]
+preprocess dataset #20:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #21:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #22:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #23:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #24:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #25:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #26:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #27:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #28:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #29:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #30:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #31:  81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                        | 1030/1266 [00:32<00:13, 17.72ex/s]
+
+
+preprocess dataset #30:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉        | 1219/1266 [00:38<00:01, 31.70ex/s]
+preprocess dataset #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 1263/1267 [00:40<00:00, 36.02ex/s]
+preprocess dataset #20:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 1250/1266 [00:39<00:00, 39.63ex/s]
+preprocess dataset #19:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 1259/1267 [00:39<00:00, 41.32ex/s]
+preprocess dataset #6: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1267/1267 [00:40<00:00, 31.13ex/s]
+preprocess dataset #23: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1261/1266 [00:39<00:00, 36.99ex/s]
+preprocess dataset #24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 1260/1266 [00:39<00:00, 34.57ex/s]
+preprocess dataset #25:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 1253/1266 [00:39<00:00, 34.68ex/s]
+preprocess dataset #19: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1266/1267 [00:40<00:00, 48.02ex/s]
+preprocess dataset #20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 1260/1266 [00:40<00:00, 40.47ex/s]
+preprocess dataset #30:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉   | 1248/1266 [00:39<00:00, 32.93ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 1264/1266 [00:39<00:00, 33.00ex/s]
+preprocess dataset #27: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 1263/1266 [00:39<00:00, 45.54ex/s]
+preprocess dataset #20: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1265/1266 [00:40<00:00, 34.08ex/s]
+preprocess dataset #9:   1%|█                                                                                                                                                                                                                           | 13/2554 [00:00<02:11, 19.39ex/s]
+preprocess dataset #10:   1%|█                                                                                                                                                                                                                          | 13/2554 [00:01<02:22, 17.87ex/s]
+preprocess dataset #11:   0%|▉                                                                                                                                                                                                                          | 11/2554 [00:00<02:39, 15.92ex/s]
+preprocess dataset #12:   0%|▊                                                                                                                                                                                                                           | 9/2554 [00:00<03:20, 12.70ex/s]
+preprocess dataset #13:   0%|▊                                                                                                                                                                                                                          | 10/2554 [00:00<02:50, 14.95ex/s]
+preprocess dataset #14:   0%|▍                                                                                                                                                                                                                           | 5/2554 [00:00<06:02,  7.02ex/s]
+preprocess dataset #15:   0%|▍                                                                                                                                                                                                                           | 5/2554 [00:00<05:53,  7.21ex/s]
+preprocess dataset #16:   0%|                                                                                                                                                                                                                            | 1/2554 [00:00<34:38,  1.23ex/s]
+preprocess dataset #17:   0%|                                                                                                                                                                                                                            | 1/2554 [00:00<33:14,  1.28ex/s]
+preprocess dataset #18:   0%|▎                                                                                                                                                                                                                           | 4/2554 [00:00<07:13,  5.89ex/s]
+preprocess dataset #19:   0%|                                                                                                                                                                                                                            | 1/2554 [00:00<33:07,  1.28ex/s]
+preprocess dataset #20:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #21:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #22:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #23:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #24:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #25:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #26:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #27:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #28:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #29:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+preprocess dataset #30:   0%|                                                                                                                                                                                                                                    | 0/2554 [00:00<?, ?ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍             | 2395/2554 [01:14<00:04, 35.53ex/s]
+preprocess dataset #29:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████         | 2449/2554 [01:16<00:03, 32.58ex/s]
+preprocess dataset #30:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋        | 2456/2554 [01:16<00:03, 32.25ex/s]
+preprocess dataset #24:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋   | 2515/2554 [01:18<00:01, 31.97ex/s]
+preprocess dataset #24:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 2519/2554 [01:19<00:01, 31.44ex/s]
+preprocess dataset #25:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 2536/2554 [01:19<00:00, 29.77ex/s]
+preprocess dataset #27:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 2525/2554 [01:18<00:00, 35.11ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 2538/2554 [01:18<00:00, 41.79ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 2543/2554 [01:19<00:00, 39.23ex/s]
+preprocess dataset #29:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 2513/2554 [01:18<00:01, 37.19ex/s]
+preprocess dataset #30:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 2522/2554 [01:18<00:00, 36.39ex/s]
+preprocess dataset #21:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 2538/2554 [01:19<00:00, 25.04ex/s]
+preprocess dataset #22:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 2527/2554 [01:19<00:00, 27.13ex/s]
+preprocess dataset #22:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 2530/2554 [01:19<00:00, 27.73ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 2552/2554 [01:19<00:00, 36.94ex/s]
+preprocess dataset #21:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 2541/2554 [01:20<00:00, 26.06ex/s]
+preprocess dataset #23: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 2545/2554 [01:19<00:00, 35.26ex/s]
+preprocess dataset #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 2545/2554 [01:20<00:00, 25.59ex/s]
+preprocess dataset #22:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 2534/2554 [01:20<00:00, 28.04ex/s]
+preprocess dataset #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2551/2554 [01:20<00:00, 26.43ex/s]
+preprocess dataset #22:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 2540/2554 [01:20<00:00, 22.32ex/s]
+preprocess dataset #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 2543/2554 [01:20<00:00, 23.09ex/s]
+preprocess dataset #22: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 2553/2554 [01:21<00:00, 24.43ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2550/2554 [01:19<00:00, 38.15ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 2552/2554 [01:19<00:00, 35.46ex/s]
+#1: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 408.82ba/s]
+#4: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 578.64ba/s]
+#5: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 445.56ba/s]
+#7: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 551.00ba/s]
+#6: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 449.84ba/s]
+#8: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 546.52ba/s]
+#9: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 567.28ba/s]
+#10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 537.41ba/s]
+#11: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 556.79ba/s]
+#12: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 561.98ba/s]
+#13: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 551.72ba/s]
+#14: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 543.55ba/s]
+#15: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 549.63ba/s]
+#16: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 555.49ba/s]
+#17: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 545.74ba/s]
+#21: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 567.86ba/s]
+#18: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 348.31ba/s]
+#19: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 375.94ba/s]
+#22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 569.04ba/s]
+#20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 374.77ba/s]
+#24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 563.19ba/s]
+#25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 560.49ba/s]
+#23: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 428.83ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 555.88ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 555.34ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 379.37ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 563.05ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 575.20ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 603.28ba/s]
+#19:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 321.33ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 319.35ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 338.46ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 360.87ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 257.64ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 340.72ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 367.80ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 129.90ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 132.48ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 138.28ba/s]
+#28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 97.97ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 117.37ba/s]
+https://symbolize.stripped_domain/r/?trace=7f09a04ef3f4,7f09a05430bf,7f,3b69d9a13ab4e89a&map=
+*** SIGTERM received by PID 1282057 (TID 1282057) on cpu 89 from PID 2276371; stack trace: ***
+PC: @     0x7f09a04ef3f4  (unknown)  do_futex_wait.constprop.0
+    @     0x7f0750d19294        976  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @     0x7f09a05430c0  796896400  (unknown)
+    @               0x80  (unknown)  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @ 0x3b69d9a13ab4e89b  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7f09a04ef3f4,7f0750d19293,7f09a05430bf,7f,3b69d9a13ab4e89a&map=fbcd4e3f2be272741f2aecd9d840a066:7f073b77c000-7f07510abc60
+E0730 14:07:30.273494 1282057 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                              | 0/10 [00:00<?, ?ba/s]
+E0730 14:07:30.318090 1282057 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#31:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.29ba/s]
+#17: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 112.09ba/s]
+#18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 113.44ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.52ba/s]
+#20: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 76.44ba/s]
+#21: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 75.45ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.91ba/s]
+#22: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 88.83ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 110.71ba/s]
+#23: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 90.59ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 110.87ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 110.42ba/s]
+#28: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 96.82ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 112.50ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 112.58ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 117.13ba/s]
+https://symbolize.stripped_domain/r/?trace=7f09a04ef3f4,7f09a05430bf,7f,3b69d9a13ab4e89a&map=                                                                                                                                                                       | 0/2 [00:00<?, ?ba/s]
+*** SIGTERM received by PID 1282194 (TID 1282194) on cpu 83 from PID 2276371; stack trace: ***
+PC: @     0x7f09a04ef3f4  (unknown)  do_futex_wait.constprop.0                                                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+    @     0x7f0750d19294        976  (unknown)
+    @     0x7f09a05430c0  796896400  (unknown)                                                                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+    @               0x80  (unknown)  (unknown)
+    @ 0x3b69d9a13ab4e89b  (unknown)  (unknown)                                                                                                                                                                                                                      | 0/2 [00:00<?, ?ba/s]
+https://symbolize.stripped_domain/r/?trace=7f09a04ef3f4,7f0750d19293,7f09a05430bf,7f,3b69d9a13ab4e89a&map=fbcd4e3f2be272741f2aecd9d840a066:7f073b77c000-7f07510abc60
+E0730 14:07:37.236151 1282194 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#15: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 84.72ba/s]
+#18: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 112.99ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 112.44ba/s]
+#12:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#13:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#14:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#15:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#16:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+Feature extractor saved in ./preprocessor_config.json                                                                                                                                                                                                               | 0/3 [00:00<?, ?ba/s]
+tokenizer config file saved in ./tokenizer_config.json
+Special tokens file saved in ./special_tokens_map.json
+added tokens file saved in ./added_tokens.json                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+Configuration saved in ./config.json
+loading feature extractor configuration file ./preprocessor_config.json                                                                                                                                                                                             | 0/3 [00:00<?, ?ba/s]
+loading configuration file ./config.json                                                                                                                                                                                                                            | 0/3 [00:00<?, ?ba/s]
+/data/flax/lib/python3.8/site-packages/transformers/configuration_utils.py:368: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
+  warnings.warn(
+Model config Wav2Vec2Config {                                                                                                                                                                                                                                       | 0/3 [00:00<?, ?ba/s]
+  "_name_or_path": "./",
+  "activation_dropout": 0.055,
+  "adapter_kernel_size": 3,                                                                                                                                                                                                                                         | 0/3 [00:00<?, ?ba/s]
+  "adapter_stride": 2,                                                                                                                                                                                                                                              | 0/3 [00:00<?, ?ba/s]
+  "add_adapter": false,
+  "apply_spec_augment": true,                                                                                                                                                                                                                                       | 0/3 [00:00<?, ?ba/s]
+  "architectures": [
+    "Wav2Vec2ForPreTraining"                                                                                                                                                                                                                                        | 0/3 [00:00<?, ?ba/s]
+  ],
+  "attention_dropout": 0.094,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.04,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": true,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.047,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.041,
+  "mask_feature_length": 64,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.25,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.082,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 38,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 39,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file ./preprocessor_config.json
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+2022-07-30 14:07:45.463224: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
+2022-07-30 14:07:45.463276: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
+/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+run_flax_speech_recognition_ctc.py:337: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+INFO:__main__:***** Running training *****
+INFO:__main__:  Num examples = 302693
+INFO:__main__:  Num Epochs = 40
+INFO:__main__:  Instantaneous batch size per device = 12
+INFO:__main__:  Num gradient accumulation steps = 1
+INFO:__main__:  Total train batch size (w. parallel & distributed) = 96
+INFO:__main__:  Total optimization steps = 126120
+INFO:__main__:  Gradient checkpointing: True
+INFO:__main__:  Use scan: False
+INFO:__main__:  Fuse matmuls: False
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [00:00<?, ?it/s]
+  grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)                                                                                                                                                                                     | 0/3153 [00:00<?, ?it/s]
+run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+run_flax_speech_recognition_ctc.py:1392: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+run_flax_speech_recognition_ctc.py:1399: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:   3%|███████▏                                                                                                                                                                                                                            | 99/3153 [39:56<1:06:53,  1.31s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:   6%|██████████████▎                                                                                                                                                                                                                    | 199/3153 [45:55<1:06:38,  1.35s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  10%|█████████████████████▌                                                                                                                                                                                                             | 300/3153 [51:37<1:05:49,  1.38s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  13%|████████████████████████████▉                                                                                                                                                                                                        | 399/3153 [57:33<58:09,  1.27s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  16%|███████████████████████████████████▉                                                                                                                                                                                               | 499/3153 [1:03:09<56:30,  1.28s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  19%|███████████████████████████████████████████                                                                                                                                                                                        | 599/3153 [1:08:46<55:31,  1.30s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  22%|██████████████████████████████████████████████████▎                                                                                                                                                                                | 699/3153 [1:14:36<55:42,  1.36s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  25%|█████████████████████████████████████████████████████████▌                                                                                                                                                                         | 800/3153 [1:20:14<54:23,  1.39s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  29%|████████████████████████████████████████████████████████████████▊                                                                                                                                                                  | 900/3153 [1:25:51<51:36,  1.37s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  32%|███████████████████████████████████████████████████████████████████████▉                                                                                                                                                           | 999/3153 [1:31:24<49:52,  1.39s/it]
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                          | 0/40 [1:31:34<?, ?it/s]
+Traceback (most recent call last):
+  File "run_flax_speech_recognition_ctc.py", line 1605, in <module>
+    main()
+  File "run_flax_speech_recognition_ctc.py", line 1541, in main
+    run_evaluation(cur_step)
+  File "run_flax_speech_recognition_ctc.py", line 1442, in run_evaluation
+    eval_samples_idx = get_grouped_indices(vectorized_datasets["eval"], eval_batch_size)
+  File "/data/flax/lib/python3.8/site-packages/datasets/dataset_dict.py", line 50, in __getitem__
+    return super().__getitem__(k)
+KeyError: 'eval'
\ No newline at end of file
diff --git a/wandb/run-20220730_135604-y1b5rbiq/files/requirements.txt b/wandb/run-20220730_135604-y1b5rbiq/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab
--- /dev/null
+++ b/wandb/run-20220730_135604-y1b5rbiq/files/requirements.txt
@@ -0,0 +1,158 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+exceptiongroup==1.0.0rc8
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+hypothesis==6.53.0
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+jiwer==2.3.0
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pyctcdecode==0.4.0
+pygments==2.11.1
+pygtrie==2.5.0
+pyparsing==3.0.6
+python-dateutil==2.8.2
+python-levenshtein==0.12.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+sortedcontainers==2.4.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220730_135604-y1b5rbiq/files/wandb-metadata.json b/wandb/run-20220730_135604-y1b5rbiq/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..77413b8e42c77ac3c30f847303c3c3fc8c1f2e9f
--- /dev/null
+++ b/wandb/run-20220730_135604-y1b5rbiq/files/wandb-metadata.json
@@ -0,0 +1,67 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-30T13:56:07.788166",
+    "startedAt": "2022-07-30T13:56:04.284610",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=./",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=12",
+        "--per_device_eval_batch_size=12",
+        "--gradient_accumulation_steps=1",
+        "--precision=full_mixed",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json b/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..4f4b362e2c3616387913e1c94c73c52192f52605
--- /dev/null
+++ b/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json
@@ -0,0 +1 @@
+{"train/grad_norm": 11.5, "layer_grad_norm/": {"lm_head": {"bias": 0.099609375, "kernel": 1.8125}, "wav2vec2": {"encoder": {"layer_norm": {"bias": 0.0751953125, "scale": 0.06640625}, "layers": {"0": {"attention": {"k_proj": {"bias": 0.000339508056640625, "kernel": 0.2158203125}, "out_proj": {"bias": 0.10205078125, "kernel": 0.9921875}, "q_proj": {"bias": 0.018798828125, "kernel": 0.2412109375}, "v_proj": {"bias": 0.08056640625, "kernel": 0.6875}}, "feed_forward": {"intermediate_dense": {"bias": 0.1142578125, "kernel": 1.5078125}, "output_dense": {"bias": 0.058349609375, "kernel": 1.296875}}, "final_layer_norm": {"bias": 0.27734375, "scale": 0.453125}, "layer_norm": {"bias": 0.1513671875, "scale": 0.34375}}, "1": {"attention": {"k_proj": {"bias": 0.00014495849609375, "kernel": 0.10107421875}, "out_proj": {"bias": 0.0673828125, "kernel": 0.7265625}, "q_proj": {"bias": 0.0091552734375, "kernel": 0.1064453125}, "v_proj": {"bias": 0.0947265625, "kernel": 0.609375}}, "feed_forward": {"intermediate_dense": {"bias": 0.076171875, "kernel": 1.078125}, "output_dense": {"bias": 0.0625, "kernel": 0.953125}}, "final_layer_norm": {"bias": 0.134765625, "scale": 0.13671875}, "layer_norm": {"bias": 0.142578125, "scale": 0.1103515625}}, "10": {"attention": {"k_proj": {"bias": 8.392333984375e-05, "kernel": 0.228515625}, "out_proj": {"bias": 0.052978515625, "kernel": 0.578125}, "q_proj": {"bias": 0.014404296875, "kernel": 0.244140625}, "v_proj": {"bias": 0.078125, "kernel": 0.703125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0732421875, "kernel": 1.078125}, "output_dense": {"bias": 0.051513671875, "kernel": 0.84375}}, "final_layer_norm": {"bias": 0.1201171875, "scale": 0.09033203125}, "layer_norm": {"bias": 0.140625, "scale": 0.0859375}}, "11": {"attention": {"k_proj": {"bias": 0.00010013580322265625, "kernel": 0.25}, "out_proj": {"bias": 0.0498046875, "kernel": 0.67578125}, "q_proj": {"bias": 0.015869140625, "kernel": 0.2451171875}, "v_proj": {"bias": 0.080078125, "kernel": 0.8125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0673828125, "kernel": 1.046875}, "output_dense": {"bias": 0.048828125, "kernel": 0.7890625}}, "final_layer_norm": {"bias": 0.10693359375, "scale": 0.087890625}, "layer_norm": {"bias": 0.138671875, "scale": 0.10546875}}, "12": {"attention": {"k_proj": {"bias": 8.630752563476562e-05, "kernel": 0.23828125}, "out_proj": {"bias": 0.04931640625, "kernel": 0.578125}, "q_proj": {"bias": 0.0146484375, "kernel": 0.23046875}, "v_proj": {"bias": 0.076171875, "kernel": 0.72265625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0732421875, "kernel": 1.0546875}, "output_dense": {"bias": 0.047607421875, "kernel": 0.7734375}}, "final_layer_norm": {"bias": 0.1201171875, "scale": 0.0986328125}, "layer_norm": {"bias": 0.11572265625, "scale": 0.0927734375}}, "13": {"attention": {"k_proj": {"bias": 0.00012683868408203125, "kernel": 0.275390625}, "out_proj": {"bias": 0.0498046875, "kernel": 0.66796875}, "q_proj": {"bias": 0.017578125, "kernel": 0.26953125}, "v_proj": {"bias": 0.08447265625, "kernel": 0.8671875}}, "feed_forward": {"intermediate_dense": {"bias": 0.07275390625, "kernel": 1.03125}, "output_dense": {"bias": 0.0517578125, "kernel": 0.8125}}, "final_layer_norm": {"bias": 0.11962890625, "scale": 0.09716796875}, "layer_norm": {"bias": 0.1171875, "scale": 0.1142578125}}, "14": {"attention": {"k_proj": {"bias": 0.0002002716064453125, "kernel": 0.224609375}, "out_proj": {"bias": 0.0498046875, "kernel": 0.65625}, "q_proj": {"bias": 0.014404296875, "kernel": 0.224609375}, "v_proj": {"bias": 0.0791015625, "kernel": 0.81640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.07568359375, "kernel": 1.0703125}, "output_dense": {"bias": 0.05029296875, "kernel": 0.87109375}}, "final_layer_norm": {"bias": 0.1259765625, "scale": 0.1279296875}, "layer_norm": {"bias": 0.11376953125, "scale": 0.1005859375}}, "15": {"attention": {"k_proj": {"bias": 0.00020694732666015625, "kernel": 0.30078125}, "out_proj": {"bias": 0.050537109375, "kernel": 0.86328125}, "q_proj": {"bias": 0.018310546875, "kernel": 0.28125}, "v_proj": {"bias": 0.0830078125, "kernel": 0.875}}, "feed_forward": {"intermediate_dense": {"bias": 0.07080078125, "kernel": 0.96875}, "output_dense": {"bias": 0.052001953125, "kernel": 0.83203125}}, "final_layer_norm": {"bias": 0.11962890625, "scale": 0.130859375}, "layer_norm": {"bias": 0.119140625, "scale": 0.1123046875}}, "16": {"attention": {"k_proj": {"bias": 0.00014400482177734375, "kernel": 0.298828125}, "out_proj": {"bias": 0.05078125, "kernel": 0.6328125}, "q_proj": {"bias": 0.018310546875, "kernel": 0.296875}, "v_proj": {"bias": 0.0810546875, "kernel": 0.76953125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0693359375, "kernel": 0.98046875}, "output_dense": {"bias": 0.048828125, "kernel": 0.859375}}, "final_layer_norm": {"bias": 0.11279296875, "scale": 0.10546875}, "layer_norm": {"bias": 0.12109375, "scale": 0.189453125}}, "17": {"attention": {"k_proj": {"bias": 0.000141143798828125, "kernel": 0.275390625}, "out_proj": {"bias": 0.0546875, "kernel": 0.59765625}, "q_proj": {"bias": 0.017333984375, "kernel": 0.275390625}, "v_proj": {"bias": 0.083984375, "kernel": 0.734375}}, "feed_forward": {"intermediate_dense": {"bias": 0.07421875, "kernel": 1.046875}, "output_dense": {"bias": 0.052978515625, "kernel": 0.8359375}}, "final_layer_norm": {"bias": 0.1201171875, "scale": 0.12060546875}, "layer_norm": {"bias": 0.126953125, "scale": 0.1328125}}, "18": {"attention": {"k_proj": {"bias": 0.000148773193359375, "kernel": 0.33203125}, "out_proj": {"bias": 0.04931640625, "kernel": 0.67578125}, "q_proj": {"bias": 0.01904296875, "kernel": 0.310546875}, "v_proj": {"bias": 0.0791015625, "kernel": 0.7265625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0673828125, "kernel": 1.0078125}, "output_dense": {"bias": 0.044921875, "kernel": 0.84375}}, "final_layer_norm": {"bias": 0.10986328125, "scale": 0.0927734375}, "layer_norm": {"bias": 0.119140625, "scale": 0.11865234375}}, "19": {"attention": {"k_proj": {"bias": 0.00011110305786132812, "kernel": 0.2314453125}, "out_proj": {"bias": 0.046875, "kernel": 0.515625}, "q_proj": {"bias": 0.0145263671875, "kernel": 0.251953125}, "v_proj": {"bias": 0.0693359375, "kernel": 0.6171875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0595703125, "kernel": 0.9453125}, "output_dense": {"bias": 0.0439453125, "kernel": 0.8203125}}, "final_layer_norm": {"bias": 0.0947265625, "scale": 0.0771484375}, "layer_norm": {"bias": 0.0986328125, "scale": 0.10302734375}}, "2": {"attention": {"k_proj": {"bias": 0.0001392364501953125, "kernel": 0.1484375}, "out_proj": {"bias": 0.07666015625, "kernel": 0.7578125}, "q_proj": {"bias": 0.01312255859375, "kernel": 0.15234375}, "v_proj": {"bias": 0.1220703125, "kernel": 0.87109375}}, "feed_forward": {"intermediate_dense": {"bias": 0.08544921875, "kernel": 1.3671875}, "output_dense": {"bias": 0.0703125, "kernel": 1.015625}}, "final_layer_norm": {"bias": 0.14453125, "scale": 0.1220703125}, "layer_norm": {"bias": 0.1611328125, "scale": 0.1435546875}}, "20": {"attention": {"k_proj": {"bias": 6.008148193359375e-05, "kernel": 0.1796875}, "out_proj": {"bias": 0.0478515625, "kernel": 0.357421875}, "q_proj": {"bias": 0.012451171875, "kernel": 0.2275390625}, "v_proj": {"bias": 0.0673828125, "kernel": 0.44921875}}, "feed_forward": {"intermediate_dense": {"bias": 0.06103515625, "kernel": 1.0}, "output_dense": {"bias": 0.0458984375, "kernel": 0.8359375}}, "final_layer_norm": {"bias": 0.0966796875, "scale": 0.08544921875}, "layer_norm": {"bias": 0.0966796875, "scale": 0.0859375}}, "21": {"attention": {"k_proj": {"bias": 0.00010013580322265625, "kernel": 0.203125}, "out_proj": {"bias": 0.0478515625, "kernel": 0.515625}, "q_proj": {"bias": 0.01239013671875, "kernel": 0.228515625}, "v_proj": {"bias": 0.06689453125, "kernel": 0.58203125}}, "feed_forward": {"intermediate_dense": {"bias": 0.060302734375, "kernel": 1.0}, "output_dense": {"bias": 0.047119140625, "kernel": 0.83203125}}, "final_layer_norm": {"bias": 0.095703125, "scale": 0.0859375}, "layer_norm": {"bias": 0.08935546875, "scale": 0.0849609375}}, "22": {"attention": {"k_proj": {"bias": 6.628036499023438e-05, "kernel": 0.20703125}, "out_proj": {"bias": 0.0517578125, "kernel": 0.43359375}, "q_proj": {"bias": 0.013671875, "kernel": 0.248046875}, "v_proj": {"bias": 0.07177734375, "kernel": 0.51953125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0673828125, "kernel": 1.0625}, "output_dense": {"bias": 0.052978515625, "kernel": 0.828125}}, "final_layer_norm": {"bias": 0.1103515625, "scale": 0.123046875}, "layer_norm": {"bias": 0.1044921875, "scale": 0.08935546875}}, "23": {"attention": {"k_proj": {"bias": 0.000152587890625, "kernel": 0.2734375}, "out_proj": {"bias": 0.05712890625, "kernel": 0.7421875}, "q_proj": {"bias": 0.0167236328125, "kernel": 0.283203125}, "v_proj": {"bias": 0.08447265625, "kernel": 0.8046875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0693359375, "kernel": 1.0703125}, "output_dense": {"bias": 0.056640625, "kernel": 0.7890625}}, "final_layer_norm": {"bias": 0.1103515625, "scale": 0.12158203125}, "layer_norm": {"bias": 0.119140625, "scale": 0.150390625}}, "24": {"attention": {"k_proj": {"bias": 0.00010776519775390625, "kernel": 0.251953125}, "out_proj": {"bias": 0.04833984375, "kernel": 0.57421875}, "q_proj": {"bias": 0.0166015625, "kernel": 0.265625}, "v_proj": {"bias": 0.083984375, "kernel": 0.6875}}, "feed_forward": {"intermediate_dense": {"bias": 0.06201171875, "kernel": 0.96875}, "output_dense": {"bias": 0.045166015625, "kernel": 0.71875}}, "final_layer_norm": {"bias": 0.1015625, "scale": 0.10546875}, "layer_norm": {"bias": 0.138671875, "scale": 0.099609375}}, "25": {"attention": {"k_proj": {"bias": 0.00013446807861328125, "kernel": 0.228515625}, "out_proj": {"bias": 0.046630859375, "kernel": 0.5859375}, "q_proj": {"bias": 0.015869140625, "kernel": 0.240234375}, "v_proj": {"bias": 0.07421875, "kernel": 0.66796875}}, "feed_forward": {"intermediate_dense": {"bias": 0.058837890625, "kernel": 0.9296875}, "output_dense": {"bias": 0.044921875, "kernel": 0.6796875}}, "final_layer_norm": {"bias": 0.1005859375, "scale": 0.126953125}, "layer_norm": {"bias": 0.111328125, "scale": 0.1376953125}}, "26": {"attention": {"k_proj": {"bias": 0.00011539459228515625, "kernel": 0.240234375}, "out_proj": {"bias": 0.04443359375, "kernel": 0.56640625}, "q_proj": {"bias": 0.0159912109375, "kernel": 0.265625}, "v_proj": {"bias": 0.07177734375, "kernel": 0.64453125}}, "feed_forward": {"intermediate_dense": {"bias": 0.05810546875, "kernel": 0.859375}, "output_dense": {"bias": 0.044921875, "kernel": 0.6953125}}, "final_layer_norm": {"bias": 0.095703125, "scale": 0.0888671875}, "layer_norm": {"bias": 0.1005859375, "scale": 0.09375}}, "27": {"attention": {"k_proj": {"bias": 0.0001506805419921875, "kernel": 0.279296875}, "out_proj": {"bias": 0.0400390625, "kernel": 0.63671875}, "q_proj": {"bias": 0.016357421875, "kernel": 0.287109375}, "v_proj": {"bias": 0.06640625, "kernel": 0.6640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.053955078125, "kernel": 0.8046875}, "output_dense": {"bias": 0.041259765625, "kernel": 0.6796875}}, "final_layer_norm": {"bias": 0.091796875, "scale": 0.08349609375}, "layer_norm": {"bias": 0.1044921875, "scale": 0.0732421875}}, "28": {"attention": {"k_proj": {"bias": 0.00016498565673828125, "kernel": 0.228515625}, "out_proj": {"bias": 0.037109375, "kernel": 0.65625}, "q_proj": {"bias": 0.014404296875, "kernel": 0.25}, "v_proj": {"bias": 0.057861328125, "kernel": 0.640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.051025390625, "kernel": 0.8046875}, "output_dense": {"bias": 0.03857421875, "kernel": 0.6875}}, "final_layer_norm": {"bias": 0.0849609375, "scale": 0.0869140625}, "layer_norm": {"bias": 0.09423828125, "scale": 0.14453125}}, "29": {"attention": {"k_proj": {"bias": 0.000110626220703125, "kernel": 0.220703125}, "out_proj": {"bias": 0.034423828125, "kernel": 0.52734375}, "q_proj": {"bias": 0.01220703125, "kernel": 0.23828125}, "v_proj": {"bias": 0.0537109375, "kernel": 0.5703125}}, "feed_forward": {"intermediate_dense": {"bias": 0.046875, "kernel": 0.84765625}, "output_dense": {"bias": 0.03271484375, "kernel": 0.66015625}}, "final_layer_norm": {"bias": 0.0712890625, "scale": 0.068359375}, "layer_norm": {"bias": 0.0927734375, "scale": 0.1044921875}}, "3": {"attention": {"k_proj": {"bias": 0.0001964569091796875, "kernel": 0.271484375}, "out_proj": {"bias": 0.0771484375, "kernel": 0.98046875}, "q_proj": {"bias": 0.02001953125, "kernel": 0.259765625}, "v_proj": {"bias": 0.1220703125, "kernel": 1.1015625}}, "feed_forward": {"intermediate_dense": {"bias": 0.091796875, "kernel": 1.453125}, "output_dense": {"bias": 0.0712890625, "kernel": 1.03125}}, "final_layer_norm": {"bias": 0.16015625, "scale": 0.1376953125}, "layer_norm": {"bias": 0.171875, "scale": 0.2041015625}}, "30": {"attention": {"k_proj": {"bias": 0.0001239776611328125, "kernel": 0.265625}, "out_proj": {"bias": 0.0322265625, "kernel": 0.53515625}, "q_proj": {"bias": 0.013916015625, "kernel": 0.291015625}, "v_proj": {"bias": 0.04833984375, "kernel": 0.578125}}, "feed_forward": {"intermediate_dense": {"bias": 0.04443359375, "kernel": 0.828125}, "output_dense": {"bias": 0.03125, "kernel": 0.57421875}}, "final_layer_norm": {"bias": 0.06689453125, "scale": 0.068359375}, "layer_norm": {"bias": 0.06884765625, "scale": 0.08154296875}}, "31": {"attention": {"k_proj": {"bias": 0.000133514404296875, "kernel": 0.2578125}, "out_proj": {"bias": 0.029541015625, "kernel": 0.52734375}, "q_proj": {"bias": 0.0140380859375, "kernel": 0.27734375}, "v_proj": {"bias": 0.04443359375, "kernel": 0.5625}}, "feed_forward": {"intermediate_dense": {"bias": 0.03955078125, "kernel": 0.734375}, "output_dense": {"bias": 0.0286865234375, "kernel": 0.5390625}}, "final_layer_norm": {"bias": 0.0615234375, "scale": 0.05810546875}, "layer_norm": {"bias": 0.0673828125, "scale": 0.09521484375}}, "32": {"attention": {"k_proj": {"bias": 0.00010204315185546875, "kernel": 0.2099609375}, "out_proj": {"bias": 0.0267333984375, "kernel": 0.4296875}, "q_proj": {"bias": 0.01165771484375, "kernel": 0.2333984375}, "v_proj": {"bias": 0.0380859375, "kernel": 0.46484375}}, "feed_forward": {"intermediate_dense": {"bias": 0.037109375, "kernel": 0.7109375}, "output_dense": {"bias": 0.025146484375, "kernel": 0.4921875}}, "final_layer_norm": {"bias": 0.058349609375, "scale": 0.05517578125}, "layer_norm": {"bias": 0.05615234375, "scale": 0.0712890625}}, "33": {"attention": {"k_proj": {"bias": 0.00010395050048828125, "kernel": 0.23828125}, "out_proj": {"bias": 0.0240478515625, "kernel": 0.447265625}, "q_proj": {"bias": 0.01312255859375, "kernel": 0.26953125}, "v_proj": {"bias": 0.03564453125, "kernel": 0.474609375}}, "feed_forward": {"intermediate_dense": {"bias": 0.03369140625, "kernel": 0.640625}, "output_dense": {"bias": 0.02294921875, "kernel": 0.4765625}}, "final_layer_norm": {"bias": 0.055419921875, "scale": 0.0634765625}, "layer_norm": {"bias": 0.051025390625, "scale": 0.06396484375}}, "34": {"attention": {"k_proj": {"bias": 0.00010824203491210938, "kernel": 0.2021484375}, "out_proj": {"bias": 0.0198974609375, "kernel": 0.4375}, "q_proj": {"bias": 0.0103759765625, "kernel": 0.216796875}, "v_proj": {"bias": 0.0291748046875, "kernel": 0.4296875}}, "feed_forward": {"intermediate_dense": {"bias": 0.02734375, "kernel": 0.5390625}, "output_dense": {"bias": 0.01904296875, "kernel": 0.451171875}}, "final_layer_norm": {"bias": 0.043701171875, "scale": 0.04541015625}, "layer_norm": {"bias": 0.044921875, "scale": 0.054443359375}}, "35": {"attention": {"k_proj": {"bias": 0.00012969970703125, "kernel": 0.1513671875}, "out_proj": {"bias": 0.017822265625, "kernel": 0.447265625}, "q_proj": {"bias": 0.007568359375, "kernel": 0.169921875}, "v_proj": {"bias": 0.0235595703125, "kernel": 0.38671875}}, "feed_forward": {"intermediate_dense": {"bias": 0.024169921875, "kernel": 0.466796875}, "output_dense": {"bias": 0.01806640625, "kernel": 0.416015625}}, "final_layer_norm": {"bias": 0.0380859375, "scale": 0.037109375}, "layer_norm": {"bias": 0.037109375, "scale": 0.04150390625}}, "36": {"attention": {"k_proj": {"bias": 6.771087646484375e-05, "kernel": 0.12890625}, "out_proj": {"bias": 0.0174560546875, "kernel": 0.396484375}, "q_proj": {"bias": 0.0062255859375, "kernel": 0.134765625}, "v_proj": {"bias": 0.0225830078125, "kernel": 0.33984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0240478515625, "kernel": 0.455078125}, "output_dense": {"bias": 0.017822265625, "kernel": 0.3828125}}, "final_layer_norm": {"bias": 0.038330078125, "scale": 0.03466796875}, "layer_norm": {"bias": 0.035400390625, "scale": 0.026123046875}}, "37": {"attention": {"k_proj": {"bias": 6.246566772460938e-05, "kernel": 0.125}, "out_proj": {"bias": 0.01708984375, "kernel": 0.4140625}, "q_proj": {"bias": 0.00634765625, "kernel": 0.13671875}, "v_proj": {"bias": 0.0238037109375, "kernel": 0.375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0234375, "kernel": 0.45703125}, "output_dense": {"bias": 0.0169677734375, "kernel": 0.376953125}}, "final_layer_norm": {"bias": 0.03759765625, "scale": 0.03564453125}, "layer_norm": {"bias": 0.041259765625, "scale": 0.033203125}}, "38": {"attention": {"k_proj": {"bias": 6.4849853515625e-05, "kernel": 0.12158203125}, "out_proj": {"bias": 0.015869140625, "kernel": 0.40234375}, "q_proj": {"bias": 0.0054931640625, "kernel": 0.123046875}, "v_proj": {"bias": 0.0224609375, "kernel": 0.3671875}}, "feed_forward": {"intermediate_dense": {"bias": 0.023681640625, "kernel": 0.47265625}, "output_dense": {"bias": 0.015869140625, "kernel": 0.39453125}}, "final_layer_norm": {"bias": 0.03857421875, "scale": 0.040771484375}, "layer_norm": {"bias": 0.03759765625, "scale": 0.0303955078125}}, "39": {"attention": {"k_proj": {"bias": 6.341934204101562e-05, "kernel": 0.11865234375}, "out_proj": {"bias": 0.01416015625, "kernel": 0.39453125}, "q_proj": {"bias": 0.00567626953125, "kernel": 0.1328125}, "v_proj": {"bias": 0.0196533203125, "kernel": 0.333984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.02294921875, "kernel": 0.484375}, "output_dense": {"bias": 0.01409912109375, "kernel": 0.4765625}}, "final_layer_norm": {"bias": 0.03662109375, "scale": 0.033447265625}, "layer_norm": {"bias": 0.03369140625, "scale": 0.0255126953125}}, "4": {"attention": {"k_proj": {"bias": 0.00023174285888671875, "kernel": 0.298828125}, "out_proj": {"bias": 0.0732421875, "kernel": 1.109375}, "q_proj": {"bias": 0.02099609375, "kernel": 0.298828125}, "v_proj": {"bias": 0.11328125, "kernel": 1.203125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0927734375, "kernel": 1.3828125}, "output_dense": {"bias": 0.072265625, "kernel": 1.015625}}, "final_layer_norm": {"bias": 0.1484375, "scale": 0.146484375}, "layer_norm": {"bias": 0.1572265625, "scale": 0.12255859375}}, "40": {"attention": {"k_proj": {"bias": 3.9577484130859375e-05, "kernel": 0.0888671875}, "out_proj": {"bias": 0.013671875, "kernel": 0.3671875}, "q_proj": {"bias": 0.003875732421875, "kernel": 0.09619140625}, "v_proj": {"bias": 0.0189208984375, "kernel": 0.33984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.02294921875, "kernel": 0.49609375}, "output_dense": {"bias": 0.013916015625, "kernel": 0.39453125}}, "final_layer_norm": {"bias": 0.038818359375, "scale": 0.0390625}, "layer_norm": {"bias": 0.030517578125, "scale": 0.025390625}}, "41": {"attention": {"k_proj": {"bias": 4.4345855712890625e-05, "kernel": 0.09912109375}, "out_proj": {"bias": 0.01226806640625, "kernel": 0.34765625}, "q_proj": {"bias": 0.00439453125, "kernel": 0.1103515625}, "v_proj": {"bias": 0.0189208984375, "kernel": 0.375}}, "feed_forward": {"intermediate_dense": {"bias": 0.019775390625, "kernel": 0.478515625}, "output_dense": {"bias": 0.0125732421875, "kernel": 0.40625}}, "final_layer_norm": {"bias": 0.03466796875, "scale": 0.035400390625}, "layer_norm": {"bias": 0.0311279296875, "scale": 0.037841796875}}, "42": {"attention": {"k_proj": {"bias": 3.0994415283203125e-05, "kernel": 0.06005859375}, "out_proj": {"bias": 0.0123291015625, "kernel": 0.30078125}, "q_proj": {"bias": 0.002777099609375, "kernel": 0.068359375}, "v_proj": {"bias": 0.01611328125, "kernel": 0.302734375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0177001953125, "kernel": 0.44921875}, "output_dense": {"bias": 0.012939453125, "kernel": 0.337890625}}, "final_layer_norm": {"bias": 0.028076171875, "scale": 0.028076171875}, "layer_norm": {"bias": 0.02490234375, "scale": 0.02880859375}}, "43": {"attention": {"k_proj": {"bias": 1.811981201171875e-05, "kernel": 0.0400390625}, "out_proj": {"bias": 0.01336669921875, "kernel": 0.2578125}, "q_proj": {"bias": 0.00183868408203125, "kernel": 0.04248046875}, "v_proj": {"bias": 0.0159912109375, "kernel": 0.275390625}}, "feed_forward": {"intermediate_dense": {"bias": 0.019775390625, "kernel": 0.5234375}, "output_dense": {"bias": 0.014404296875, "kernel": 0.3515625}}, "final_layer_norm": {"bias": 0.032470703125, "scale": 0.0303955078125}, "layer_norm": {"bias": 0.029052734375, "scale": 0.029296875}}, "44": {"attention": {"k_proj": {"bias": 1.52587890625e-05, "kernel": 0.041015625}, "out_proj": {"bias": 0.0142822265625, "kernel": 0.28125}, "q_proj": {"bias": 0.00186920166015625, "kernel": 0.04296875}, "v_proj": {"bias": 0.017578125, "kernel": 0.310546875}}, "feed_forward": {"intermediate_dense": {"bias": 0.018310546875, "kernel": 0.515625}, "output_dense": {"bias": 0.0155029296875, "kernel": 0.3203125}}, "final_layer_norm": {"bias": 0.0284423828125, "scale": 0.025390625}, "layer_norm": {"bias": 0.03369140625, "scale": 0.0263671875}}, "45": {"attention": {"k_proj": {"bias": 1.5497207641601562e-05, "kernel": 0.03955078125}, "out_proj": {"bias": 0.0146484375, "kernel": 0.265625}, "q_proj": {"bias": 0.001953125, "kernel": 0.042236328125}, "v_proj": {"bias": 0.0185546875, "kernel": 0.3046875}}, "feed_forward": {"intermediate_dense": {"bias": 0.015869140625, "kernel": 0.4375}, "output_dense": {"bias": 0.01556396484375, "kernel": 0.27734375}}, "final_layer_norm": {"bias": 0.025146484375, "scale": 0.0228271484375}, "layer_norm": {"bias": 0.0400390625, "scale": 0.0294189453125}}, "46": {"attention": {"k_proj": {"bias": 1.633167266845703e-05, "kernel": 0.03955078125}, "out_proj": {"bias": 0.0142822265625, "kernel": 0.2578125}, "q_proj": {"bias": 0.0018463134765625, "kernel": 0.03857421875}, "v_proj": {"bias": 0.0196533203125, "kernel": 0.3203125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0125732421875, "kernel": 0.31640625}, "output_dense": {"bias": 0.0140380859375, "kernel": 0.244140625}}, "final_layer_norm": {"bias": 0.01904296875, "scale": 0.0223388671875}, "layer_norm": {"bias": 0.051025390625, "scale": 0.036865234375}}, "47": {"attention": {"k_proj": {"bias": 1.4424324035644531e-05, "kernel": 0.04736328125}, "out_proj": {"bias": 0.013916015625, "kernel": 0.193359375}, "q_proj": {"bias": 0.0025177001953125, "kernel": 0.04248046875}, "v_proj": {"bias": 0.024169921875, "kernel": 0.3359375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0103759765625, "kernel": 0.203125}, "output_dense": {"bias": 0.0125732421875, "kernel": 0.181640625}}, "final_layer_norm": {"bias": 0.020751953125, "scale": 0.0184326171875}, "layer_norm": {"bias": 0.06396484375, "scale": 0.044921875}}, "5": {"attention": {"k_proj": {"bias": 0.0001220703125, "kernel": 0.271484375}, "out_proj": {"bias": 0.07421875, "kernel": 0.77734375}, "q_proj": {"bias": 0.0185546875, "kernel": 0.283203125}, "v_proj": {"bias": 0.1162109375, "kernel": 0.953125}}, "feed_forward": {"intermediate_dense": {"bias": 0.09375, "kernel": 1.3203125}, "output_dense": {"bias": 0.076171875, "kernel": 1.0}}, "final_layer_norm": {"bias": 0.158203125, "scale": 0.125}, "layer_norm": {"bias": 0.171875, "scale": 0.15234375}}, "6": {"attention": {"k_proj": {"bias": 0.000164031982421875, "kernel": 0.3203125}, "out_proj": {"bias": 0.068359375, "kernel": 0.9453125}, "q_proj": {"bias": 0.021240234375, "kernel": 0.3203125}, "v_proj": {"bias": 0.11865234375, "kernel": 1.125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0849609375, "kernel": 1.265625}, "output_dense": {"bias": 0.0703125, "kernel": 0.91796875}}, "final_layer_norm": {"bias": 0.1396484375, "scale": 0.1142578125}, "layer_norm": {"bias": 0.16796875, "scale": 0.15625}}, "7": {"attention": {"k_proj": {"bias": 0.00028228759765625, "kernel": 0.30078125}, "out_proj": {"bias": 0.06787109375, "kernel": 0.953125}, "q_proj": {"bias": 0.0201416015625, "kernel": 0.296875}, "v_proj": {"bias": 0.1064453125, "kernel": 1.0625}}, "feed_forward": {"intermediate_dense": {"bias": 0.08251953125, "kernel": 1.28125}, "output_dense": {"bias": 0.0673828125, "kernel": 0.921875}}, "final_layer_norm": {"bias": 0.1357421875, "scale": 0.11962890625}, "layer_norm": {"bias": 0.1689453125, "scale": 0.12890625}}, "8": {"attention": {"k_proj": {"bias": 0.0001583099365234375, "kernel": 0.2890625}, "out_proj": {"bias": 0.06396484375, "kernel": 0.84375}, "q_proj": {"bias": 0.0184326171875, "kernel": 0.283203125}, "v_proj": {"bias": 0.10546875, "kernel": 1.0}}, "feed_forward": {"intermediate_dense": {"bias": 0.08056640625, "kernel": 1.234375}, "output_dense": {"bias": 0.06201171875, "kernel": 0.8984375}}, "final_layer_norm": {"bias": 0.138671875, "scale": 0.126953125}, "layer_norm": {"bias": 0.162109375, "scale": 0.1796875}}, "9": {"attention": {"k_proj": {"bias": 0.000194549560546875, "kernel": 0.302734375}, "out_proj": {"bias": 0.054931640625, "kernel": 0.984375}, "q_proj": {"bias": 0.0174560546875, "kernel": 0.30078125}, "v_proj": {"bias": 0.08837890625, "kernel": 1.09375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0703125, "kernel": 1.125}, "output_dense": {"bias": 0.0556640625, "kernel": 0.8828125}}, "final_layer_norm": {"bias": 0.1162109375, "scale": 0.1240234375}, "layer_norm": {"bias": 0.140625, "scale": 0.095703125}}}, "pos_conv_embed": {"conv": {"bias": 0.130859375, "weight_g": 0.072265625, "weight_v": 0.91796875}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "1": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "2": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "3": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "4": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "5": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "6": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}}}, "feature_projection": {"layer_norm": {"bias": 0.2490234375, "scale": 0.3671875}, "projection": {"bias": 0.1611328125, "kernel": 2.9375}}, "masked_spec_embed": 0.0}}, "layer_param_norm/": {"lm_head": {"bias": 0.006227790843695402, "kernel": 4.543642997741699}, "wav2vec2": {"encoder": {"layer_norm": {"bias": 0.8080048561096191, "scale": 22.27030372619629}, "layers": {"0": {"attention": {"k_proj": {"bias": 0.017717748880386353, "kernel": 25.907127380371094}, "out_proj": {"bias": 1.5421152114868164, "kernel": 25.076631546020508}, "q_proj": {"bias": 1.2994158267974854, "kernel": 26.18454360961914}, "v_proj": {"bias": 0.3453245759010315, "kernel": 25.80449676513672}}, "feed_forward": {"intermediate_dense": {"bias": 1.7608319520950317, "kernel": 95.11439514160156}, "output_dense": {"bias": 1.0213779211044312, "kernel": 90.89668273925781}}, "final_layer_norm": {"bias": 1.2822446823120117, "scale": 19.880083084106445}, "layer_norm": {"bias": 3.290161609649658, "scale": 16.032001495361328}}, "1": {"attention": {"k_proj": {"bias": 0.017420589923858643, "kernel": 40.236366271972656}, "out_proj": {"bias": 1.2911627292633057, "kernel": 41.64336395263672}, "q_proj": {"bias": 2.8566694259643555, "kernel": 40.07891082763672}, "v_proj": {"bias": 0.2825187146663666, "kernel": 40.126407623291016}}, "feed_forward": {"intermediate_dense": {"bias": 1.577695608139038, "kernel": 93.2037124633789}, "output_dense": {"bias": 0.8034582138061523, "kernel": 84.11729431152344}}, "final_layer_norm": {"bias": 1.1328635215759277, "scale": 18.407102584838867}, "layer_norm": {"bias": 1.73884916305542, "scale": 19.37253189086914}}, "10": {"attention": {"k_proj": {"bias": 0.03383120149374008, "kernel": 47.28413391113281}, "out_proj": {"bias": 1.2185262441635132, "kernel": 50.11650848388672}, "q_proj": {"bias": 2.4208953380584717, "kernel": 47.244537353515625}, "v_proj": {"bias": 0.31392401456832886, "kernel": 50.314903259277344}}, "feed_forward": {"intermediate_dense": {"bias": 1.622002124786377, "kernel": 97.558349609375}, "output_dense": {"bias": 0.5634207725524902, "kernel": 91.49876403808594}}, "final_layer_norm": {"bias": 2.1998660564422607, "scale": 20.355709075927734}, "layer_norm": {"bias": 1.6904888153076172, "scale": 22.30306625366211}}, "11": {"attention": {"k_proj": {"bias": 0.09422115236520767, "kernel": 47.070281982421875}, "out_proj": {"bias": 1.0706963539123535, "kernel": 49.301727294921875}, "q_proj": {"bias": 2.470736026763916, "kernel": 46.793678283691406}, "v_proj": {"bias": 0.3550601005554199, "kernel": 49.84492111206055}}, "feed_forward": {"intermediate_dense": {"bias": 1.67034113407135, "kernel": 98.30744934082031}, "output_dense": {"bias": 0.5463298559188843, "kernel": 93.21025848388672}}, "final_layer_norm": {"bias": 2.18017578125, "scale": 20.362842559814453}, "layer_norm": {"bias": 1.6739838123321533, "scale": 22.603429794311523}}, "12": {"attention": {"k_proj": {"bias": 0.037733033299446106, "kernel": 47.66437530517578}, "out_proj": {"bias": 1.055997610092163, "kernel": 49.61451721191406}, "q_proj": {"bias": 2.3581721782684326, "kernel": 47.41926956176758}, "v_proj": {"bias": 0.34188324213027954, "kernel": 50.03089904785156}}, "feed_forward": {"intermediate_dense": {"bias": 1.713924765586853, "kernel": 99.14901733398438}, "output_dense": {"bias": 0.5359264612197876, "kernel": 94.75711059570312}}, "final_layer_norm": {"bias": 2.1364316940307617, "scale": 20.325605392456055}, "layer_norm": {"bias": 1.7382376194000244, "scale": 23.154159545898438}}, "13": {"attention": {"k_proj": {"bias": 0.06478377431631088, "kernel": 49.548011779785156}, "out_proj": {"bias": 1.049849510192871, "kernel": 49.253868103027344}, "q_proj": {"bias": 2.3329415321350098, "kernel": 49.404293060302734}, "v_proj": {"bias": 0.3697168529033661, "kernel": 49.43449783325195}}, "feed_forward": {"intermediate_dense": {"bias": 1.7687466144561768, "kernel": 99.75472259521484}, "output_dense": {"bias": 0.5525321960449219, "kernel": 95.19114685058594}}, "final_layer_norm": {"bias": 2.020570993423462, "scale": 20.470088958740234}, "layer_norm": {"bias": 1.837044358253479, "scale": 23.37548065185547}}, "14": {"attention": {"k_proj": {"bias": 0.15069520473480225, "kernel": 49.753196716308594}, "out_proj": {"bias": 1.2117998600006104, "kernel": 47.694759368896484}, "q_proj": {"bias": 2.397552490234375, "kernel": 49.812904357910156}, "v_proj": {"bias": 0.37084758281707764, "kernel": 47.27647399902344}}, "feed_forward": {"intermediate_dense": {"bias": 1.8047257661819458, "kernel": 100.37411499023438}, "output_dense": {"bias": 0.5676146745681763, "kernel": 96.55570983886719}}, "final_layer_norm": {"bias": 2.1551315784454346, "scale": 20.614219665527344}, "layer_norm": {"bias": 1.9676849842071533, "scale": 23.550649642944336}}, "15": {"attention": {"k_proj": {"bias": 0.07710058987140656, "kernel": 49.88433074951172}, "out_proj": {"bias": 1.261141061782837, "kernel": 48.29498291015625}, "q_proj": {"bias": 2.5427799224853516, "kernel": 49.952571868896484}, "v_proj": {"bias": 0.40316635370254517, "kernel": 47.94293975830078}}, "feed_forward": {"intermediate_dense": {"bias": 1.8133320808410645, "kernel": 100.18331909179688}, "output_dense": {"bias": 0.7154802083969116, "kernel": 97.23637390136719}}, "final_layer_norm": {"bias": 2.079444169998169, "scale": 20.706073760986328}, "layer_norm": {"bias": 2.2165818214416504, "scale": 23.696819305419922}}, "16": {"attention": {"k_proj": {"bias": 0.03493429720401764, "kernel": 49.79267120361328}, "out_proj": {"bias": 1.1955194473266602, "kernel": 47.76436996459961}, "q_proj": {"bias": 2.6263279914855957, "kernel": 49.67594909667969}, "v_proj": {"bias": 0.358820378780365, "kernel": 47.44581604003906}}, "feed_forward": {"intermediate_dense": {"bias": 1.8111960887908936, "kernel": 100.83795166015625}, "output_dense": {"bias": 0.7391186356544495, "kernel": 98.10263061523438}}, "final_layer_norm": {"bias": 2.1532938480377197, "scale": 21.192392349243164}, "layer_norm": {"bias": 2.1495795249938965, "scale": 22.608299255371094}}, "17": {"attention": {"k_proj": {"bias": 0.02427005022764206, "kernel": 49.99943542480469}, "out_proj": {"bias": 1.1385328769683838, "kernel": 47.08169937133789}, "q_proj": {"bias": 2.698282241821289, "kernel": 50.10108184814453}, "v_proj": {"bias": 0.39649325609207153, "kernel": 46.75300598144531}}, "feed_forward": {"intermediate_dense": {"bias": 1.8220467567443848, "kernel": 101.9210205078125}, "output_dense": {"bias": 0.7551708817481995, "kernel": 98.4967269897461}}, "final_layer_norm": {"bias": 2.2398810386657715, "scale": 21.752655029296875}, "layer_norm": {"bias": 2.0628278255462646, "scale": 22.166473388671875}}, "18": {"attention": {"k_proj": {"bias": 0.06514191627502441, "kernel": 50.276710510253906}, "out_proj": {"bias": 1.2418107986450195, "kernel": 48.10392379760742}, "q_proj": {"bias": 2.5922303199768066, "kernel": 50.66260528564453}, "v_proj": {"bias": 0.42488259077072144, "kernel": 47.63117218017578}}, "feed_forward": {"intermediate_dense": {"bias": 1.864889144897461, "kernel": 102.2223892211914}, "output_dense": {"bias": 0.8703033924102783, "kernel": 100.13847351074219}}, "final_layer_norm": {"bias": 2.3449723720550537, "scale": 21.713321685791016}, "layer_norm": {"bias": 2.2429957389831543, "scale": 23.88089370727539}}, "19": {"attention": {"k_proj": {"bias": 0.02069919742643833, "kernel": 49.551841735839844}, "out_proj": {"bias": 1.2166869640350342, "kernel": 47.99229431152344}, "q_proj": {"bias": 2.8673105239868164, "kernel": 49.98308563232422}, "v_proj": {"bias": 0.3887897729873657, "kernel": 47.23723602294922}}, "feed_forward": {"intermediate_dense": {"bias": 1.9199435710906982, "kernel": 102.80052947998047}, "output_dense": {"bias": 0.9345583915710449, "kernel": 101.04116821289062}}, "final_layer_norm": {"bias": 2.3030409812927246, "scale": 22.07358169555664}, "layer_norm": {"bias": 2.1653337478637695, "scale": 23.089134216308594}}, "2": {"attention": {"k_proj": {"bias": 0.03994838893413544, "kernel": 46.15595245361328}, "out_proj": {"bias": 1.2123382091522217, "kernel": 43.85425567626953}, "q_proj": {"bias": 3.044398069381714, "kernel": 45.923065185546875}, "v_proj": {"bias": 0.3091737627983093, "kernel": 43.853729248046875}}, "feed_forward": {"intermediate_dense": {"bias": 1.6172540187835693, "kernel": 98.27151489257812}, "output_dense": {"bias": 0.6916781067848206, "kernel": 87.2510986328125}}, "final_layer_norm": {"bias": 1.4531984329223633, "scale": 20.986406326293945}, "layer_norm": {"bias": 1.66593337059021, "scale": 22.054546356201172}}, "20": {"attention": {"k_proj": {"bias": 0.019117258489131927, "kernel": 49.522117614746094}, "out_proj": {"bias": 1.245902419090271, "kernel": 47.3779296875}, "q_proj": {"bias": 2.7806339263916016, "kernel": 50.30908203125}, "v_proj": {"bias": 0.36149418354034424, "kernel": 46.277198791503906}}, "feed_forward": {"intermediate_dense": {"bias": 1.9210526943206787, "kernel": 104.06356811523438}, "output_dense": {"bias": 1.0503356456756592, "kernel": 101.6682357788086}}, "final_layer_norm": {"bias": 2.3304975032806396, "scale": 23.017230987548828}, "layer_norm": {"bias": 2.1417791843414307, "scale": 23.232433319091797}}, "21": {"attention": {"k_proj": {"bias": 0.039454348385334015, "kernel": 49.967464447021484}, "out_proj": {"bias": 1.2816205024719238, "kernel": 47.4180908203125}, "q_proj": {"bias": 2.7252862453460693, "kernel": 50.81111145019531}, "v_proj": {"bias": 0.41635215282440186, "kernel": 46.52488708496094}}, "feed_forward": {"intermediate_dense": {"bias": 1.9635816812515259, "kernel": 104.25914764404297}, "output_dense": {"bias": 1.1212176084518433, "kernel": 101.99238586425781}}, "final_layer_norm": {"bias": 2.3592710494995117, "scale": 22.663631439208984}, "layer_norm": {"bias": 2.2133560180664062, "scale": 23.51139259338379}}, "22": {"attention": {"k_proj": {"bias": 0.023747840896248817, "kernel": 50.355613708496094}, "out_proj": {"bias": 1.2000508308410645, "kernel": 46.872554779052734}, "q_proj": {"bias": 2.8076577186584473, "kernel": 50.73884201049805}, "v_proj": {"bias": 0.3693321943283081, "kernel": 46.74058532714844}}, "feed_forward": {"intermediate_dense": {"bias": 1.8949806690216064, "kernel": 104.65501403808594}, "output_dense": {"bias": 1.1306650638580322, "kernel": 101.26948547363281}}, "final_layer_norm": {"bias": 2.244089126586914, "scale": 22.18447494506836}, "layer_norm": {"bias": 2.2082674503326416, "scale": 22.5196475982666}}, "23": {"attention": {"k_proj": {"bias": 0.12220916152000427, "kernel": 51.456703186035156}, "out_proj": {"bias": 1.3277795314788818, "kernel": 47.87244415283203}, "q_proj": {"bias": 2.6426448822021484, "kernel": 51.569129943847656}, "v_proj": {"bias": 0.5203225612640381, "kernel": 48.51310348510742}}, "feed_forward": {"intermediate_dense": {"bias": 1.872227430343628, "kernel": 104.44924926757812}, "output_dense": {"bias": 1.108591079711914, "kernel": 102.06685638427734}}, "final_layer_norm": {"bias": 2.4933314323425293, "scale": 22.13880157470703}, "layer_norm": {"bias": 2.6961421966552734, "scale": 23.728824615478516}}, "24": {"attention": {"k_proj": {"bias": 0.058006178587675095, "kernel": 49.9398193359375}, "out_proj": {"bias": 1.3834214210510254, "kernel": 49.853782653808594}, "q_proj": {"bias": 2.8019046783447266, "kernel": 49.93363952636719}, "v_proj": {"bias": 0.4747922122478485, "kernel": 49.930416107177734}}, "feed_forward": {"intermediate_dense": {"bias": 1.9901800155639648, "kernel": 103.92146301269531}, "output_dense": {"bias": 1.1459870338439941, "kernel": 104.94160461425781}}, "final_layer_norm": {"bias": 2.5980639457702637, "scale": 22.196256637573242}, "layer_norm": {"bias": 2.4199328422546387, "scale": 23.271270751953125}}, "25": {"attention": {"k_proj": {"bias": 0.0486767403781414, "kernel": 50.49137878417969}, "out_proj": {"bias": 1.1953943967819214, "kernel": 47.765968322753906}, "q_proj": {"bias": 2.877917766571045, "kernel": 50.27877426147461}, "v_proj": {"bias": 0.5565428137779236, "kernel": 48.302940368652344}}, "feed_forward": {"intermediate_dense": {"bias": 1.8897809982299805, "kernel": 104.18898010253906}, "output_dense": {"bias": 1.0260541439056396, "kernel": 104.87144470214844}}, "final_layer_norm": {"bias": 2.3020076751708984, "scale": 22.731998443603516}, "layer_norm": {"bias": 2.5791854858398438, "scale": 22.420787811279297}}, "26": {"attention": {"k_proj": {"bias": 0.07401315867900848, "kernel": 50.69556427001953}, "out_proj": {"bias": 1.132948637008667, "kernel": 48.53791046142578}, "q_proj": {"bias": 2.8362269401550293, "kernel": 50.46051788330078}, "v_proj": {"bias": 0.4930846691131592, "kernel": 49.14529800415039}}, "feed_forward": {"intermediate_dense": {"bias": 1.9828646183013916, "kernel": 103.61874389648438}, "output_dense": {"bias": 0.9868142604827881, "kernel": 102.04234313964844}}, "final_layer_norm": {"bias": 1.9354043006896973, "scale": 21.58776092529297}, "layer_norm": {"bias": 2.483001232147217, "scale": 22.860599517822266}}, "27": {"attention": {"k_proj": {"bias": 0.37386101484298706, "kernel": 51.353492736816406}, "out_proj": {"bias": 1.360071063041687, "kernel": 49.861541748046875}, "q_proj": {"bias": 2.6180477142333984, "kernel": 51.21508026123047}, "v_proj": {"bias": 0.5687844753265381, "kernel": 50.31639099121094}}, "feed_forward": {"intermediate_dense": {"bias": 2.1435513496398926, "kernel": 101.89665985107422}, "output_dense": {"bias": 0.8681224584579468, "kernel": 101.72557067871094}}, "final_layer_norm": {"bias": 2.216545581817627, "scale": 20.85482406616211}, "layer_norm": {"bias": 2.5525119304656982, "scale": 23.54396629333496}}, "28": {"attention": {"k_proj": {"bias": 0.41085001826286316, "kernel": 52.282447814941406}, "out_proj": {"bias": 1.3871128559112549, "kernel": 50.62632751464844}, "q_proj": {"bias": 2.7654333114624023, "kernel": 51.92694854736328}, "v_proj": {"bias": 0.46052616834640503, "kernel": 50.95448684692383}}, "feed_forward": {"intermediate_dense": {"bias": 2.0904147624969482, "kernel": 101.90587615966797}, "output_dense": {"bias": 0.770317554473877, "kernel": 103.91770935058594}}, "final_layer_norm": {"bias": 2.126486301422119, "scale": 21.1737060546875}, "layer_norm": {"bias": 2.0537118911743164, "scale": 24.406505584716797}}, "29": {"attention": {"k_proj": {"bias": 0.07059745490550995, "kernel": 48.74183654785156}, "out_proj": {"bias": 1.3648455142974854, "kernel": 53.14055633544922}, "q_proj": {"bias": 2.740316867828369, "kernel": 48.56298828125}, "v_proj": {"bias": 0.4175933599472046, "kernel": 53.0452880859375}}, "feed_forward": {"intermediate_dense": {"bias": 2.0906143188476562, "kernel": 102.57981872558594}, "output_dense": {"bias": 0.8713856935501099, "kernel": 108.18291473388672}}, "final_layer_norm": {"bias": 2.367311477661133, "scale": 22.308059692382812}, "layer_norm": {"bias": 2.15020489692688, "scale": 25.381393432617188}}, "3": {"attention": {"k_proj": {"bias": 0.12173552811145782, "kernel": 50.12483215332031}, "out_proj": {"bias": 1.3624351024627686, "kernel": 46.49520492553711}, "q_proj": {"bias": 2.719555616378784, "kernel": 50.35211181640625}, "v_proj": {"bias": 0.2993015646934509, "kernel": 46.89512634277344}}, "feed_forward": {"intermediate_dense": {"bias": 1.6325161457061768, "kernel": 99.91313171386719}, "output_dense": {"bias": 0.6524742841720581, "kernel": 90.10629272460938}}, "final_layer_norm": {"bias": 1.7124708890914917, "scale": 21.081825256347656}, "layer_norm": {"bias": 1.8278560638427734, "scale": 23.59053611755371}}, "30": {"attention": {"k_proj": {"bias": 0.2557613253593445, "kernel": 50.66333770751953}, "out_proj": {"bias": 1.1596150398254395, "kernel": 49.418575286865234}, "q_proj": {"bias": 2.8001761436462402, "kernel": 50.7453498840332}, "v_proj": {"bias": 0.4823254644870758, "kernel": 49.760719299316406}}, "feed_forward": {"intermediate_dense": {"bias": 2.026075839996338, "kernel": 103.08485412597656}, "output_dense": {"bias": 0.8237862586975098, "kernel": 107.17366790771484}}, "final_layer_norm": {"bias": 2.191945791244507, "scale": 23.445449829101562}, "layer_norm": {"bias": 2.3006272315979004, "scale": 25.11294174194336}}, "31": {"attention": {"k_proj": {"bias": 0.3532944321632385, "kernel": 49.19044494628906}, "out_proj": {"bias": 1.0852205753326416, "kernel": 50.28578186035156}, "q_proj": {"bias": 2.582430362701416, "kernel": 49.29231262207031}, "v_proj": {"bias": 0.527869462966919, "kernel": 50.41516876220703}}, "feed_forward": {"intermediate_dense": {"bias": 2.1042604446411133, "kernel": 101.77178192138672}, "output_dense": {"bias": 1.001185417175293, "kernel": 104.5643310546875}}, "final_layer_norm": {"bias": 2.0816102027893066, "scale": 23.34256362915039}, "layer_norm": {"bias": 2.2965025901794434, "scale": 24.890090942382812}}, "32": {"attention": {"k_proj": {"bias": 0.2081139087677002, "kernel": 48.02776336669922}, "out_proj": {"bias": 1.0938222408294678, "kernel": 49.4695930480957}, "q_proj": {"bias": 2.8447506427764893, "kernel": 48.01158142089844}, "v_proj": {"bias": 0.3958088755607605, "kernel": 49.75651168823242}}, "feed_forward": {"intermediate_dense": {"bias": 2.0343356132507324, "kernel": 100.63871765136719}, "output_dense": {"bias": 1.0618953704833984, "kernel": 103.91098022460938}}, "final_layer_norm": {"bias": 2.0418777465820312, "scale": 23.778400421142578}, "layer_norm": {"bias": 2.2472598552703857, "scale": 25.151775360107422}}, "33": {"attention": {"k_proj": {"bias": 0.21029098331928253, "kernel": 47.97268295288086}, "out_proj": {"bias": 1.13039231300354, "kernel": 49.31745147705078}, "q_proj": {"bias": 2.9875826835632324, "kernel": 47.9653434753418}, "v_proj": {"bias": 0.42682555317878723, "kernel": 49.583251953125}}, "feed_forward": {"intermediate_dense": {"bias": 2.0421395301818848, "kernel": 99.01563262939453}, "output_dense": {"bias": 1.0347816944122314, "kernel": 102.68461608886719}}, "final_layer_norm": {"bias": 1.9543565511703491, "scale": 23.545602798461914}, "layer_norm": {"bias": 2.4407973289489746, "scale": 25.397212982177734}}, "34": {"attention": {"k_proj": {"bias": 0.22819873690605164, "kernel": 47.184783935546875}, "out_proj": {"bias": 1.3782916069030762, "kernel": 50.803382873535156}, "q_proj": {"bias": 2.8665852546691895, "kernel": 47.237979888916016}, "v_proj": {"bias": 0.3953763246536255, "kernel": 50.73908233642578}}, "feed_forward": {"intermediate_dense": {"bias": 2.1229429244995117, "kernel": 97.85118103027344}, "output_dense": {"bias": 0.9661370515823364, "kernel": 102.001708984375}}, "final_layer_norm": {"bias": 1.8967653512954712, "scale": 23.200794219970703}, "layer_norm": {"bias": 2.5211293697357178, "scale": 25.781917572021484}}, "35": {"attention": {"k_proj": {"bias": 0.35871943831443787, "kernel": 48.906837463378906}, "out_proj": {"bias": 1.2972804307937622, "kernel": 49.65650939941406}, "q_proj": {"bias": 2.615225076675415, "kernel": 49.24107360839844}, "v_proj": {"bias": 0.4798451066017151, "kernel": 49.480987548828125}}, "feed_forward": {"intermediate_dense": {"bias": 2.20389986038208, "kernel": 96.45945739746094}, "output_dense": {"bias": 0.8606913089752197, "kernel": 100.74075317382812}}, "final_layer_norm": {"bias": 1.9770451784133911, "scale": 23.324676513671875}, "layer_norm": {"bias": 2.2845458984375, "scale": 26.272663116455078}}, "36": {"attention": {"k_proj": {"bias": 0.19124102592468262, "kernel": 46.22441864013672}, "out_proj": {"bias": 1.3377137184143066, "kernel": 50.99781799316406}, "q_proj": {"bias": 2.70013689994812, "kernel": 46.213951110839844}, "v_proj": {"bias": 0.36395663022994995, "kernel": 51.181884765625}}, "feed_forward": {"intermediate_dense": {"bias": 2.07747220993042, "kernel": 95.55824279785156}, "output_dense": {"bias": 0.8954221606254578, "kernel": 100.43612670898438}}, "final_layer_norm": {"bias": 1.6182420253753662, "scale": 23.849767684936523}, "layer_norm": {"bias": 2.0086894035339355, "scale": 25.781164169311523}}, "37": {"attention": {"k_proj": {"bias": 0.5271086692810059, "kernel": 45.2613639831543}, "out_proj": {"bias": 1.598453402519226, "kernel": 50.981468200683594}, "q_proj": {"bias": 2.3939218521118164, "kernel": 45.332435607910156}, "v_proj": {"bias": 0.3595349192619324, "kernel": 50.851402282714844}}, "feed_forward": {"intermediate_dense": {"bias": 1.972982406616211, "kernel": 94.81402587890625}, "output_dense": {"bias": 0.9043532013893127, "kernel": 100.2063217163086}}, "final_layer_norm": {"bias": 1.446416974067688, "scale": 24.250770568847656}, "layer_norm": {"bias": 1.9783923625946045, "scale": 25.818511962890625}}, "38": {"attention": {"k_proj": {"bias": 0.6130545139312744, "kernel": 43.451454162597656}, "out_proj": {"bias": 1.298864483833313, "kernel": 50.46533203125}, "q_proj": {"bias": 2.3286657333374023, "kernel": 43.462501525878906}, "v_proj": {"bias": 0.4180901050567627, "kernel": 50.337013244628906}}, "feed_forward": {"intermediate_dense": {"bias": 1.918135404586792, "kernel": 92.86172485351562}, "output_dense": {"bias": 0.892139196395874, "kernel": 98.45660400390625}}, "final_layer_norm": {"bias": 1.4936511516571045, "scale": 24.967491149902344}, "layer_norm": {"bias": 2.156099557876587, "scale": 26.533777236938477}}, "39": {"attention": {"k_proj": {"bias": 0.6435011625289917, "kernel": 43.22444152832031}, "out_proj": {"bias": 1.5929176807403564, "kernel": 50.33856201171875}, "q_proj": {"bias": 2.1112735271453857, "kernel": 43.61250686645508}, "v_proj": {"bias": 0.38804692029953003, "kernel": 50.01123046875}}, "feed_forward": {"intermediate_dense": {"bias": 1.9113759994506836, "kernel": 91.18324279785156}, "output_dense": {"bias": 0.971623420715332, "kernel": 98.84017944335938}}, "final_layer_norm": {"bias": 1.6386053562164307, "scale": 25.60051918029785}, "layer_norm": {"bias": 2.134451389312744, "scale": 27.175662994384766}}, "4": {"attention": {"k_proj": {"bias": 0.13544148206710815, "kernel": 52.686279296875}, "out_proj": {"bias": 1.5433743000030518, "kernel": 47.89597702026367}, "q_proj": {"bias": 2.520129680633545, "kernel": 52.86738967895508}, "v_proj": {"bias": 0.34588170051574707, "kernel": 48.25390625}}, "feed_forward": {"intermediate_dense": {"bias": 1.621065378189087, "kernel": 99.4951171875}, "output_dense": {"bias": 0.8157577514648438, "kernel": 91.33027648925781}}, "final_layer_norm": {"bias": 1.797032356262207, "scale": 20.613189697265625}, "layer_norm": {"bias": 1.921250343322754, "scale": 23.96576690673828}}, "40": {"attention": {"k_proj": {"bias": 0.5847006440162659, "kernel": 42.58441162109375}, "out_proj": {"bias": 1.53633451461792, "kernel": 48.99122619628906}, "q_proj": {"bias": 2.0470333099365234, "kernel": 43.350059509277344}, "v_proj": {"bias": 0.44075465202331543, "kernel": 48.568878173828125}}, "feed_forward": {"intermediate_dense": {"bias": 1.7721619606018066, "kernel": 89.45103454589844}, "output_dense": {"bias": 1.0236999988555908, "kernel": 96.0992660522461}}, "final_layer_norm": {"bias": 1.799966812133789, "scale": 24.871023178100586}, "layer_norm": {"bias": 2.0770342350006104, "scale": 26.721298217773438}}, "41": {"attention": {"k_proj": {"bias": 1.6700019836425781, "kernel": 39.923744201660156}, "out_proj": {"bias": 1.298099398612976, "kernel": 50.55332946777344}, "q_proj": {"bias": 1.7261749505996704, "kernel": 40.68635559082031}, "v_proj": {"bias": 0.3966291546821594, "kernel": 49.504920959472656}}, "feed_forward": {"intermediate_dense": {"bias": 1.9124207496643066, "kernel": 86.245361328125}, "output_dense": {"bias": 1.0469214916229248, "kernel": 95.14845275878906}}, "final_layer_norm": {"bias": 2.298802614212036, "scale": 28.323535919189453}, "layer_norm": {"bias": 2.107060432434082, "scale": 28.51062774658203}}, "42": {"attention": {"k_proj": {"bias": 0.7963449954986572, "kernel": 36.71210479736328}, "out_proj": {"bias": 1.338565707206726, "kernel": 44.78729248046875}, "q_proj": {"bias": 1.5457658767700195, "kernel": 38.06196594238281}, "v_proj": {"bias": 0.5876978635787964, "kernel": 43.13445281982422}}, "feed_forward": {"intermediate_dense": {"bias": 1.6503515243530273, "kernel": 85.23811340332031}, "output_dense": {"bias": 1.0994179248809814, "kernel": 93.35334777832031}}, "final_layer_norm": {"bias": 2.021554470062256, "scale": 29.623184204101562}, "layer_norm": {"bias": 1.5729947090148926, "scale": 27.38263702392578}}, "43": {"attention": {"k_proj": {"bias": 1.2092329263687134, "kernel": 33.23188018798828}, "out_proj": {"bias": 1.3312959671020508, "kernel": 41.18092346191406}, "q_proj": {"bias": 1.356087327003479, "kernel": 34.04554748535156}, "v_proj": {"bias": 0.5175312757492065, "kernel": 39.07632064819336}}, "feed_forward": {"intermediate_dense": {"bias": 1.6842585802078247, "kernel": 84.46446228027344}, "output_dense": {"bias": 0.8656268119812012, "kernel": 91.28285217285156}}, "final_layer_norm": {"bias": 1.9466145038604736, "scale": 31.840717315673828}, "layer_norm": {"bias": 1.6922515630722046, "scale": 25.534618377685547}}, "44": {"attention": {"k_proj": {"bias": 2.490459442138672, "kernel": 33.82369613647461}, "out_proj": {"bias": 1.0941083431243896, "kernel": 44.90596008300781}, "q_proj": {"bias": 1.2875206470489502, "kernel": 34.19591522216797}, "v_proj": {"bias": 0.3790741264820099, "kernel": 43.999244689941406}}, "feed_forward": {"intermediate_dense": {"bias": 1.7634968757629395, "kernel": 83.41915893554688}, "output_dense": {"bias": 0.8121882081031799, "kernel": 88.93473052978516}}, "final_layer_norm": {"bias": 1.9330906867980957, "scale": 34.01293182373047}, "layer_norm": {"bias": 1.5868189334869385, "scale": 25.552824020385742}}, "45": {"attention": {"k_proj": {"bias": 2.0480833053588867, "kernel": 33.66320037841797}, "out_proj": {"bias": 0.9800894260406494, "kernel": 48.50392532348633}, "q_proj": {"bias": 1.3665473461151123, "kernel": 33.8492431640625}, "v_proj": {"bias": 0.4303898215293884, "kernel": 48.66197204589844}}, "feed_forward": {"intermediate_dense": {"bias": 1.881667137145996, "kernel": 80.08865356445312}, "output_dense": {"bias": 0.94748854637146, "kernel": 84.32666778564453}}, "final_layer_norm": {"bias": 1.6785303354263306, "scale": 32.72064971923828}, "layer_norm": {"bias": 1.5169761180877686, "scale": 24.069011688232422}}, "46": {"attention": {"k_proj": {"bias": 1.5384384393692017, "kernel": 34.843414306640625}, "out_proj": {"bias": 0.7449491024017334, "kernel": 50.93366241455078}, "q_proj": {"bias": 1.5333590507507324, "kernel": 34.964630126953125}, "v_proj": {"bias": 0.37132495641708374, "kernel": 51.68553161621094}}, "feed_forward": {"intermediate_dense": {"bias": 1.941842794418335, "kernel": 74.42733764648438}, "output_dense": {"bias": 1.1018041372299194, "kernel": 74.62886047363281}}, "final_layer_norm": {"bias": 1.6753082275390625, "scale": 28.232973098754883}, "layer_norm": {"bias": 1.3341909646987915, "scale": 22.984222412109375}}, "47": {"attention": {"k_proj": {"bias": 0.2589734196662903, "kernel": 37.107086181640625}, "out_proj": {"bias": 0.6299062967300415, "kernel": 45.20429992675781}, "q_proj": {"bias": 1.651952862739563, "kernel": 37.7532958984375}, "v_proj": {"bias": 0.3462907671928406, "kernel": 46.18851852416992}}, "feed_forward": {"intermediate_dense": {"bias": 1.9934666156768799, "kernel": 71.76575469970703}, "output_dense": {"bias": 0.605868935585022, "kernel": 68.13175201416016}}, "final_layer_norm": {"bias": 1.5152955055236816, "scale": 23.07292366027832}, "layer_norm": {"bias": 1.0596134662628174, "scale": 20.234088897705078}}, "5": {"attention": {"k_proj": {"bias": 0.018954617902636528, "kernel": 48.02461242675781}, "out_proj": {"bias": 1.5274896621704102, "kernel": 49.122745513916016}, "q_proj": {"bias": 2.616206645965576, "kernel": 48.16704177856445}, "v_proj": {"bias": 0.30917540192604065, "kernel": 49.918846130371094}}, "feed_forward": {"intermediate_dense": {"bias": 1.5450689792633057, "kernel": 99.6058120727539}, "output_dense": {"bias": 0.845067024230957, "kernel": 90.62794494628906}}, "final_layer_norm": {"bias": 2.0742123126983643, "scale": 20.826757431030273}, "layer_norm": {"bias": 1.9509385824203491, "scale": 23.38003921508789}}, "6": {"attention": {"k_proj": {"bias": 0.20099012553691864, "kernel": 49.64418411254883}, "out_proj": {"bias": 1.5177661180496216, "kernel": 48.44267272949219}, "q_proj": {"bias": 2.6649043560028076, "kernel": 50.12384033203125}, "v_proj": {"bias": 0.3115385174751282, "kernel": 48.973114013671875}}, "feed_forward": {"intermediate_dense": {"bias": 1.5237888097763062, "kernel": 98.6987075805664}, "output_dense": {"bias": 0.6965006589889526, "kernel": 90.21868133544922}}, "final_layer_norm": {"bias": 2.374725818634033, "scale": 20.303024291992188}, "layer_norm": {"bias": 1.9539954662322998, "scale": 23.747432708740234}}, "7": {"attention": {"k_proj": {"bias": 0.19492888450622559, "kernel": 49.445472717285156}, "out_proj": {"bias": 1.3328473567962646, "kernel": 48.69258117675781}, "q_proj": {"bias": 2.440291404724121, "kernel": 49.83523941040039}, "v_proj": {"bias": 0.39602091908454895, "kernel": 48.65628433227539}}, "feed_forward": {"intermediate_dense": {"bias": 1.5299232006072998, "kernel": 98.44473266601562}, "output_dense": {"bias": 0.5384174585342407, "kernel": 89.95935821533203}}, "final_layer_norm": {"bias": 2.2129130363464355, "scale": 20.541099548339844}, "layer_norm": {"bias": 1.8585599660873413, "scale": 22.472408294677734}}, "8": {"attention": {"k_proj": {"bias": 0.17308779060840607, "kernel": 48.946983337402344}, "out_proj": {"bias": 1.1581041812896729, "kernel": 49.24256896972656}, "q_proj": {"bias": 2.4154317378997803, "kernel": 48.71516418457031}, "v_proj": {"bias": 0.32466843724250793, "kernel": 49.42582702636719}}, "feed_forward": {"intermediate_dense": {"bias": 1.5838823318481445, "kernel": 98.05018615722656}, "output_dense": {"bias": 0.49450039863586426, "kernel": 89.38335418701172}}, "final_layer_norm": {"bias": 2.1680216789245605, "scale": 20.328733444213867}, "layer_norm": {"bias": 1.7943463325500488, "scale": 22.934885025024414}}, "9": {"attention": {"k_proj": {"bias": 0.20712265372276306, "kernel": 49.56227111816406}, "out_proj": {"bias": 1.3597404956817627, "kernel": 50.03252410888672}, "q_proj": {"bias": 2.3744239807128906, "kernel": 49.721893310546875}, "v_proj": {"bias": 0.33329081535339355, "kernel": 50.44175338745117}}, "feed_forward": {"intermediate_dense": {"bias": 1.665709376335144, "kernel": 96.65989685058594}, "output_dense": {"bias": 0.6352252960205078, "kernel": 89.92665100097656}}, "final_layer_norm": {"bias": 2.058505058288574, "scale": 19.611835479736328}, "layer_norm": {"bias": 1.8844467401504517, "scale": 24.294519424438477}}}, "pos_conv_embed": {"conv": {"bias": 5.548188209533691, "weight_g": 8.812222480773926, "weight_v": 84.63363647460938}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 2.0290679931640625, "kernel": 20.55536460876465}, "layer_norm": {"bias": 4.550922393798828, "scale": 16.167570114135742}}, "1": {"conv": {"bias": 1.7790228128433228, "kernel": 51.24136734008789}, "layer_norm": {"bias": 5.962646961212158, "scale": 23.268157958984375}}, "2": {"conv": {"bias": 1.140576720237732, "kernel": 46.50312042236328}, "layer_norm": {"bias": 4.176670551300049, "scale": 20.370853424072266}}, "3": {"conv": {"bias": 0.6725863218307495, "kernel": 44.397525787353516}, "layer_norm": {"bias": 3.888174533843994, "scale": 17.53795051574707}}, "4": {"conv": {"bias": 0.6373162269592285, "kernel": 41.314056396484375}, "layer_norm": {"bias": 2.385471820831299, "scale": 16.34571647644043}}, "5": {"conv": {"bias": 0.5147221684455872, "kernel": 37.479759216308594}, "layer_norm": {"bias": 2.020900011062622, "scale": 17.064470291137695}}, "6": {"conv": {"bias": 0.4947893023490906, "kernel": 40.64780044555664}, "layer_norm": {"bias": 0.5876954793930054, "scale": 19.058603286743164}}}}, "feature_projection": {"layer_norm": {"bias": 6.323210716247559, "scale": 16.550113677978516}, "projection": {"bias": 1.6564881801605225, "kernel": 34.68690490722656}}, "masked_spec_embed": 11.914372444152832}}, "train/learning_rate": 2.497500099707395e-05, "train/loss": 1.0381834506988525, "train/param_norm": 1186.01953125, "_runtime": 6204, "_timestamp": 1659195568, "_step": 1000, "_wandb": {"runtime": 6205}}
\ No newline at end of file
diff --git a/wandb/run-20220730_135604-y1b5rbiq/logs/debug-internal.log b/wandb/run-20220730_135604-y1b5rbiq/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..37892c0e7c9bab0ce2bdb4f9a1a97b3c8080bc6f
--- /dev/null
+++ b/wandb/run-20220730_135604-y1b5rbiq/logs/debug-internal.log
@@ -0,0 +1,2398 @@
+2022-07-30 13:56:05,186 INFO    MainThread:2277729 [internal.py:wandb_internal():87] W&B internal server running at pid: 2277729, started at: 2022-07-30 13:56:05.186478
+2022-07-30 13:56:05,188 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: check_version
+2022-07-30 13:56:05,188 INFO    WriterThread:2277729 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/run-y1b5rbiq.wandb
+2022-07-30 13:56:05,189 DEBUG   SenderThread:2277729 [sender.py:send():234] send: header
+2022-07-30 13:56:05,189 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: check_version
+2022-07-30 13:56:05,227 DEBUG   SenderThread:2277729 [sender.py:send():234] send: run
+2022-07-30 13:56:05,409 INFO    SenderThread:2277729 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files
+2022-07-30 13:56:05,409 INFO    SenderThread:2277729 [sender.py:_start_run_threads():804] run started: y1b5rbiq with start time 1659189364
+2022-07-30 13:56:05,409 DEBUG   SenderThread:2277729 [sender.py:send():234] send: summary
+2022-07-30 13:56:05,409 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: run_start
+2022-07-30 13:56:05,410 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 13:56:06,413 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json
+2022-07-30 13:56:07,787 DEBUG   HandlerThread:2277729 [meta.py:__init__():40] meta init
+2022-07-30 13:56:07,788 DEBUG   HandlerThread:2277729 [meta.py:__init__():54] meta init done
+2022-07-30 13:56:07,788 DEBUG   HandlerThread:2277729 [meta.py:probe():214] probe
+2022-07-30 13:56:07,789 DEBUG   HandlerThread:2277729 [meta.py:_setup_git():204] setup git
+2022-07-30 13:56:07,831 DEBUG   HandlerThread:2277729 [meta.py:_setup_git():211] setup git done
+2022-07-30 13:56:07,831 DEBUG   HandlerThread:2277729 [meta.py:_save_code():92] save code
+2022-07-30 13:56:07,844 DEBUG   HandlerThread:2277729 [meta.py:_save_code():113] save code done
+2022-07-30 13:56:07,845 DEBUG   HandlerThread:2277729 [meta.py:_save_patches():130] save patches
+2022-07-30 13:56:07,921 DEBUG   HandlerThread:2277729 [meta.py:_save_patches():172] save patches done
+2022-07-30 13:56:07,922 DEBUG   HandlerThread:2277729 [meta.py:_save_pip():58] save pip
+2022-07-30 13:56:07,922 DEBUG   HandlerThread:2277729 [meta.py:_save_pip():72] save pip done
+2022-07-30 13:56:07,922 DEBUG   HandlerThread:2277729 [meta.py:probe():252] probe done
+2022-07-30 13:56:07,926 DEBUG   SenderThread:2277729 [sender.py:send():234] send: files
+2022-07-30 13:56:07,926 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-30 13:56:07,926 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-30 13:56:07,927 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file diff.patch with policy now
+2022-07-30 13:56:07,933 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:56:07,933 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:56:08,396 INFO    Thread-11 :2277729 [upload_job.py:push():137] Uploaded file /tmp/tmp17z1awz3wandb/3e9bk1i5-wandb-metadata.json
+2022-07-30 13:56:08,418 INFO    Thread-13 :2277729 [upload_job.py:push():137] Uploaded file /tmp/tmp17z1awz3wandb/3h7xlesk-diff.patch
+2022-07-30 13:56:08,421 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-metadata.json
+2022-07-30 13:56:08,421 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/code/run_flax_speech_recognition_ctc.py
+2022-07-30 13:56:08,421 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/requirements.txt
+2022-07-30 13:56:08,422 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/diff.patch
+2022-07-30 13:56:08,422 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:56:08,422 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/code
+2022-07-30 13:56:08,624 INFO    Thread-12 :2277729 [upload_job.py:push():137] Uploaded file /tmp/tmp17z1awz3wandb/2c1tu24r-code/run_flax_speech_recognition_ctc.py
+2022-07-30 13:56:10,422 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:56:12,423 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:56:14,424 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:56:16,425 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:56:22,428 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:56:23,070 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:56:23,070 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:56:24,429 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:56:35,871 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 13:56:38,212 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:56:38,213 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:56:38,435 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:56:40,436 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:56:51,441 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:56:53,346 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:56:53,346 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:56:53,442 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:57:05,946 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 13:57:07,448 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:57:08,479 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:57:08,479 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:57:23,631 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:57:23,631 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:57:36,022 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 13:57:38,782 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:57:38,782 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:57:48,464 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:57:50,465 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:57:52,466 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:57:53,977 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:57:53,977 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:57:54,467 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:57:56,468 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:57:58,469 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:00,470 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:03,472 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:05,473 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:06,098 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 13:58:07,474 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:09,401 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:58:09,401 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:58:09,475 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:11,475 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:13,476 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:15,477 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:17,478 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:19,479 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:21,480 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:23,481 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:24,586 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:58:24,586 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:58:25,482 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:27,483 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:29,484 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:31,485 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:33,486 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:35,487 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:36,169 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 13:58:37,488 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:39,489 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:39,731 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:58:39,731 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:58:41,490 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:43,491 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:45,492 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:47,493 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:49,494 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:51,495 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:53,496 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:54,878 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:58:54,878 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:58:55,498 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:57,498 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:58:59,499 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:01,501 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:03,502 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:05,503 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:06,239 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 13:59:07,504 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:09,505 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:10,015 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:59:10,016 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:59:11,506 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:13,507 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:15,510 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:17,511 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:19,512 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:21,513 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:23,514 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:25,178 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:59:25,178 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:59:25,515 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:27,516 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:29,517 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:31,518 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:33,519 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:35,520 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:36,321 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 13:59:37,521 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:39,522 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:40,357 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:59:40,358 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:59:41,523 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:43,524 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:45,525 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:47,529 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:49,530 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:51,531 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:53,532 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:55,509 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 13:59:55,510 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 13:59:55,534 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:57,534 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 13:59:59,535 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:01,536 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:03,537 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:05,539 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:06,406 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:00:07,540 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:09,541 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:10,645 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:00:10,645 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:00:11,542 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:13,543 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:16,544 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:18,546 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:20,547 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:22,553 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:24,554 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:25,853 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:00:25,854 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:00:26,556 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:28,557 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:30,558 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:32,559 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:34,561 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:36,479 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:00:36,562 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:38,563 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:40,564 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:41,000 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:00:41,000 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:00:42,565 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:44,566 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:46,568 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:48,569 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:50,570 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:52,571 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:54,572 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:56,140 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:00:56,140 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:00:56,573 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:00:58,574 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:00,575 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:02,576 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:04,577 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:06,561 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:01:06,579 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:08,580 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:10,581 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:11,282 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:01:11,283 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:01:12,583 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:14,584 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:16,586 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:18,586 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:20,587 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:22,588 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:24,589 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:26,431 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:01:26,431 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:01:26,590 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:28,591 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:30,592 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:32,593 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:34,595 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:36,596 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:36,639 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:01:38,597 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:40,598 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:41,578 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:01:41,579 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:01:42,599 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:44,600 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:46,601 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:48,602 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:50,603 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:52,604 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:54,605 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:56,606 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:01:56,745 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:01:56,745 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:01:58,607 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:00,608 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:02,609 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:04,611 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:06,612 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:06,718 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:02:08,613 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:10,614 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:11,896 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:02:11,896 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:02:12,615 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:14,616 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:16,617 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:18,618 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:20,619 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:22,620 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:24,621 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:26,622 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:27,039 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:02:27,039 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:02:28,623 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:30,624 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:32,625 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:34,626 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:36,627 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:36,797 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:02:38,628 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:40,629 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:42,178 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:02:42,179 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:02:42,630 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:44,631 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:46,632 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:48,633 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:50,634 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:52,635 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:54,636 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:56,637 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:02:57,337 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:02:57,339 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:02:58,639 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:00,639 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:02,640 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:04,641 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:06,642 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:06,872 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:03:08,643 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:10,644 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:12,486 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:03:12,486 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:03:12,645 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:14,647 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:16,648 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:18,649 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:20,650 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:22,651 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:27,688 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:03:27,689 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:03:36,945 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:03:42,825 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:03:42,825 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:03:53,663 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:55,664 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:57,665 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:03:57,961 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:03:57,962 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:03:59,666 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:01,667 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:03,668 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:05,669 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:07,029 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:04:07,670 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:09,671 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:11,675 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:13,096 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:04:13,097 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:04:13,676 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:15,677 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:17,678 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:19,679 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:21,680 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:23,681 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:25,682 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:27,683 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:28,234 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:04:28,234 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:04:29,684 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:31,685 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:33,686 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:04:37,105 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:04:43,373 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:04:43,373 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:04:58,507 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:04:58,508 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:05:07,182 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:05:07,701 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:09,702 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:11,703 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:13,652 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:05:13,652 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:05:13,703 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:15,704 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:17,705 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:19,706 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:21,708 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:23,709 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:25,709 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:27,710 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:28,792 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:05:28,793 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:05:30,712 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:32,713 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:34,713 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:36,715 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:37,252 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:05:40,717 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:42,718 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:43,951 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:05:43,952 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:05:44,719 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:46,720 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:48,721 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:50,722 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:52,723 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:54,723 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:56,724 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:58,725 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:05:59,091 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:05:59,092 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:06:00,726 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:06:02,727 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:06:04,728 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:06:06,729 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:06:07,337 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:06:08,730 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:06:10,732 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:06:12,734 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:06:14,247 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:06:14,248 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:06:14,734 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:06:16,735 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:06:18,736 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:06:20,739 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:06:22,739 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:06:24,740 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:06:26,740 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:06:28,742 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:06:29,397 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:06:29,397 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:06:37,413 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:06:44,536 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:06:44,537 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:06:59,669 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:06:59,670 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:07:07,490 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:07:10,761 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:07:14,981 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:07:14,981 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:07:19,765 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:07:25,767 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:07:30,164 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:07:30,164 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:07:31,770 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:07:37,560 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:07:37,773 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:07:44,776 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:07:45,305 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:07:45,305 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:07:46,777 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:07:50,778 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:07:54,780 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:07:56,781 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:08:00,442 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:08:00,442 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:08:04,785 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:08:07,629 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:08:15,594 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:08:15,594 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:08:30,756 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:08:30,756 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:08:33,796 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:08:37,702 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:08:41,800 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:08:43,801 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:08:45,933 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:08:45,933 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:09:01,096 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:09:01,096 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:09:07,774 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:09:16,245 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:09:16,245 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:09:31,388 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:09:31,389 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:09:37,845 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:09:46,522 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:09:46,523 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:10:01,658 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:10:01,658 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:10:07,919 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:10:16,795 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:10:16,795 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:10:31,933 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:10:31,933 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:10:37,998 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:10:47,069 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:10:47,069 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:11:02,209 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:11:02,210 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:11:08,072 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:11:17,348 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:11:17,348 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:11:32,498 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:11:32,499 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:11:38,151 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:11:47,630 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:11:47,630 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:12:02,800 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:12:02,800 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:12:08,343 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:12:17,934 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:12:17,935 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:12:24,907 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:12:33,098 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:12:33,099 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:12:38,420 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:12:48,261 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:12:48,262 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:13:03,672 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:13:03,672 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:13:08,494 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:13:18,830 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:13:18,830 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:13:33,977 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:13:33,977 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:13:38,572 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:13:49,120 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:13:49,120 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:14:04,252 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:14:04,253 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:14:08,648 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:14:19,390 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:14:19,390 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:14:34,525 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:14:34,525 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:14:38,722 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:14:49,658 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:14:49,658 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:15:04,815 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:15:04,815 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:15:08,800 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:15:19,946 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:15:19,946 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:15:35,085 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:15:35,086 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:15:38,877 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:15:50,224 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:15:50,224 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:16:05,364 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:16:05,365 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:16:08,949 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:16:20,501 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:16:20,501 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:16:35,638 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:16:35,638 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:16:39,026 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:16:48,017 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:16:50,838 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:16:50,839 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:17:06,045 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:17:06,045 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:17:09,100 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:17:21,243 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:17:21,243 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:17:36,401 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:17:36,402 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:17:39,173 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:17:51,549 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:17:51,550 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:18:06,684 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:18:06,684 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:18:09,249 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:18:21,820 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:18:21,821 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:18:36,960 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:18:36,960 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:18:39,326 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:18:52,093 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:18:52,093 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:19:07,239 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:19:07,240 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:19:09,400 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:19:22,441 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:19:22,441 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:19:37,586 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:19:37,587 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:19:39,466 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:19:52,726 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:19:52,727 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:20:07,858 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:20:07,859 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:20:09,534 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:20:22,993 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:20:22,994 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:20:38,130 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:20:38,130 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:20:39,629 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:20:53,278 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:20:53,278 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:21:08,423 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:21:08,423 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:21:09,734 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:21:16,129 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:21:23,574 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:21:23,574 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:21:38,737 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:21:38,738 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:21:39,804 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:21:53,901 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:21:53,902 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:22:09,073 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:22:09,073 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:22:09,872 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:22:24,234 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:22:24,235 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:22:39,371 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:22:39,371 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:22:39,935 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:22:54,505 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:22:54,505 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:23:09,643 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:23:09,643 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:23:09,999 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:23:24,778 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:23:24,778 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:23:39,924 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:23:39,924 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:23:40,068 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:23:55,059 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:23:55,059 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:24:10,143 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:24:10,193 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:24:10,193 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:24:25,327 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:24:25,327 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:24:40,218 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:24:40,461 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:24:40,462 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:24:55,601 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:24:55,602 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:25:10,310 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:25:10,737 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:25:10,738 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:25:25,886 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:25:25,887 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:25:40,387 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:25:41,746 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:25:41,746 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:25:43,250 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:25:47,252 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:25:53,254 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:25:56,987 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:25:56,987 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:25:58,257 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:26:04,259 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:26:10,454 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:26:12,307 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:26:12,307 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:26:27,768 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:26:27,768 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:26:40,524 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:26:42,936 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:26:42,936 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:26:58,101 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:26:58,101 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:27:10,594 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:27:13,245 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:27:13,246 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:27:28,379 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:27:28,379 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:27:40,665 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:27:43,518 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:27:43,518 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:27:58,654 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:27:58,655 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:28:10,735 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:28:13,818 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:28:13,818 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:28:28,953 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:28:28,953 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:28:40,809 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:28:44,095 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:28:44,096 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:28:59,232 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:28:59,233 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:29:10,882 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:29:14,366 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:29:14,367 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:29:29,499 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:29:29,499 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:29:40,958 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:29:44,666 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:29:44,667 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:29:59,804 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:29:59,805 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:30:11,042 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:30:14,949 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:30:14,950 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:30:25,365 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:30:29,366 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:30:30,225 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:30:30,226 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:30:35,369 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:30:39,370 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:30:41,121 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:30:43,372 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:30:45,590 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:30:45,590 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:30:47,374 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:30:51,375 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:30:56,378 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:30:58,379 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:31:00,911 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:31:00,911 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:31:02,380 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:31:11,194 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:31:16,224 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:31:16,225 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:31:31,380 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:31:31,380 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:31:41,269 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:31:46,552 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:31:46,553 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:32:01,698 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:32:01,698 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:32:11,345 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:32:16,831 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:32:16,832 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:32:31,965 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:32:31,965 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:32:41,420 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:32:47,101 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:32:47,102 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:33:02,236 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:33:02,236 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:33:11,495 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:33:17,376 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:33:17,376 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:33:32,510 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:33:32,510 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:33:41,569 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:33:47,652 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:33:47,652 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:34:02,782 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:34:02,782 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:34:11,645 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:34:17,915 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:34:17,916 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:34:33,062 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:34:33,062 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:34:41,717 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:34:48,201 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:34:48,201 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:35:03,335 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:35:03,336 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:35:11,801 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:35:18,469 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:35:18,469 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:35:33,487 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:35:33,709 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:35:33,709 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:35:37,489 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:35:40,490 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:35:41,881 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:35:44,492 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:35:48,494 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:35:48,998 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:35:48,998 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:35:50,495 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:35:54,497 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:35:56,498 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:35:58,499 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:36:02,500 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:36:04,253 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:36:04,253 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:36:04,501 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:36:06,502 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:36:10,504 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:36:11,952 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:36:19,604 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:36:19,604 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:36:34,765 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:36:34,765 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:36:42,025 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:36:49,926 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:36:49,927 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:37:05,074 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:37:05,074 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:37:12,097 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:37:20,211 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:37:20,211 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:37:35,343 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:37:35,344 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:37:42,174 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:37:50,476 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:37:50,476 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:38:05,614 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:38:05,615 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:38:12,248 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:38:20,748 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:38:20,749 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:38:35,886 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:38:35,886 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:38:42,325 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:38:51,015 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:38:51,016 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:39:06,154 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:39:06,154 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:39:12,400 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:39:21,670 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:39:21,671 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:39:36,813 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:39:36,814 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:39:42,474 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:39:51,956 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:39:51,956 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:40:07,098 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:40:07,099 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:40:12,553 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:40:22,240 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:40:22,240 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:40:26,611 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:40:31,612 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:40:33,613 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:40:35,614 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:40:37,427 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:40:37,427 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:40:37,615 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:40:39,616 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:40:41,617 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:40:42,625 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:40:43,618 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:40:45,619 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:40:47,620 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:40:49,621 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:40:51,622 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:40:52,614 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:40:52,615 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:41:07,797 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:41:07,797 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:41:12,701 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:41:22,967 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:41:22,967 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:41:38,136 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:41:38,136 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:41:42,775 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:41:53,278 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:41:53,278 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:42:08,415 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:42:08,415 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:42:12,853 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:42:23,548 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:42:23,548 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:42:38,684 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:42:38,685 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:42:42,930 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:42:53,846 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:42:53,847 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:43:08,983 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:43:08,983 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:43:13,006 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:43:24,120 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:43:24,121 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:43:39,257 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:43:39,257 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:43:43,081 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:43:54,415 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:43:54,415 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:44:09,551 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:44:09,552 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:44:13,159 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:44:24,699 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:44:24,699 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:44:39,837 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:44:39,838 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:44:43,231 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:44:54,971 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:44:54,971 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:45:01,720 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:45:03,721 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:45:06,722 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:45:10,702 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:45:10,702 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:45:13,304 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:45:16,726 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:45:24,729 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:45:26,133 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:45:26,133 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:45:30,732 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:45:38,735 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:45:41,402 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:45:41,403 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:45:42,736 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:45:43,376 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:45:49,739 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:45:55,741 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:45:56,757 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:45:56,758 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:45:59,743 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:46:05,746 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:46:09,747 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:46:12,002 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:46:12,003 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:46:13,442 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:46:15,750 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:46:19,751 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:46:24,754 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:46:27,234 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:46:27,234 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:46:28,755 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:46:32,757 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:46:36,759 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:46:40,761 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:46:42,592 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:46:42,593 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:46:43,509 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:46:44,762 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:46:48,764 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:46:53,766 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:46:55,767 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:46:57,810 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:46:57,810 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:46:59,769 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:03,770 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:05,771 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:09,773 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:13,100 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:47:13,100 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:47:13,578 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:47:13,775 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:15,776 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:19,777 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:21,778 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:23,779 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:27,781 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:28,297 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:47:28,297 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:47:29,782 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:34,784 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:36,784 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:38,785 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:40,786 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:42,787 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:43,506 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:47:43,506 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:47:43,650 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:47:44,788 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:46,789 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:48,790 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:50,791 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:52,792 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:54,793 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:56,794 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:47:58,671 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:47:58,671 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:47:58,794 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:48:00,795 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:48:01,796 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:48:02,796 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:48:13,721 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:48:13,800 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:48:13,955 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:48:13,955 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:48:19,803 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:48:27,806 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:48:29,259 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:48:29,259 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:48:33,811 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:48:39,813 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:48:43,793 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:48:44,548 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:48:44,548 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:48:46,816 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:48:50,818 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:48:56,821 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:48:59,831 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:48:59,832 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:49:00,823 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:49:04,825 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:49:10,827 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:49:13,866 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:49:14,829 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:49:15,610 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:49:15,611 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:49:18,831 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:49:22,833 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:49:27,835 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:49:30,935 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:49:30,935 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:49:31,837 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:49:35,838 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:49:39,840 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:49:43,841 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:49:43,941 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:49:46,133 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:49:46,133 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:49:49,844 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:49:53,846 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:49:57,848 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:49:59,849 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:01,321 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:50:01,322 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:50:03,850 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:08,852 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:10,853 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:14,012 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:50:14,855 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:16,594 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:50:16,594 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:50:16,856 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:20,858 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:22,859 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:24,859 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:26,860 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:30,862 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:31,864 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:50:31,864 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:50:32,863 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:34,864 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:36,865 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:38,866 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:40,867 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:42,868 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:44,086 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:50:44,869 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:46,869 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:47,060 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:50:47,060 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:50:48,870 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:51,872 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:53,873 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:55,873 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:57,874 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:50:59,875 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:51:01,876 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:51:02,375 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:51:02,375 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:51:10,880 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:51:14,159 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:51:17,861 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:51:17,861 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:51:18,884 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:51:24,886 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:51:33,068 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:51:33,069 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:51:34,890 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:51:40,893 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:51:44,231 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:51:46,895 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:51:48,267 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:51:48,267 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:51:51,898 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:51:57,900 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:52:03,544 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:52:03,544 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:52:05,903 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:52:09,905 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:52:13,907 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:52:14,307 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:52:17,908 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:52:18,815 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:52:18,815 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:52:21,910 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:52:25,912 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:52:29,913 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:52:34,127 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:52:34,127 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:52:34,915 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:52:38,917 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:52:42,919 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:52:44,382 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:52:46,921 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:52:49,399 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:52:49,400 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:52:50,923 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:52:54,925 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:52:56,925 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:00,927 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:02,928 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:04,675 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:53:04,675 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:53:06,930 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:09,932 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:13,933 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:14,456 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:53:15,934 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:19,879 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:53:19,880 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:53:19,936 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:24,938 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:26,939 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:28,940 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:32,942 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:34,943 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:35,075 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:53:35,076 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:53:36,944 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:38,945 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:40,946 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:42,947 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:44,532 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:53:44,948 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:46,949 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:48,950 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:50,284 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:53:50,285 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:53:50,951 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:52,952 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:54,952 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:56,953 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:58,954 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:53:59,864 DEBUG   SenderThread:2277729 [sender.py:send():234] send: history
+2022-07-30 14:53:59,868 DEBUG   SenderThread:2277729 [sender.py:send():234] send: summary
+2022-07-30 14:53:59,872 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 14:53:59,955 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json
+2022-07-30 14:54:00,955 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:54:01,956 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:54:05,540 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:54:05,541 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:54:11,960 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:54:14,610 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:54:17,963 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:54:20,988 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:54:20,989 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:54:25,966 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:54:31,968 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:54:36,301 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:54:36,301 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:54:38,971 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:54:42,973 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:54:44,686 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:54:48,976 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:54:51,565 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:54:51,566 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:54:52,977 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:54:58,980 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:55:02,982 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:55:06,891 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:55:06,892 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:55:07,984 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:55:13,987 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:55:14,761 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:55:17,989 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:55:21,991 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:55:22,184 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:55:22,185 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:55:23,992 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:55:27,993 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:55:31,995 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:55:35,997 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:55:37,439 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:55:37,439 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:55:39,999 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:55:44,001 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:55:44,836 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:55:49,003 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:55:51,004 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:55:52,781 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:55:52,782 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:55:55,005 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:55:57,006 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:01,008 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:03,009 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:07,011 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:08,034 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:56:08,034 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:56:09,012 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:13,014 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:14,910 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:56:15,015 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:17,015 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:21,017 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:23,245 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:56:23,245 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:56:24,018 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:26,019 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:28,020 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:30,021 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:32,022 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:34,023 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:36,024 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:38,025 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:38,430 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:56:38,431 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:56:40,026 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:42,027 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:44,028 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:44,987 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:56:46,029 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:48,030 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:50,031 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:56:53,725 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:56:53,725 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:57:02,035 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:57:09,039 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:57:09,147 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:57:09,147 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:57:15,066 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:57:17,042 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:57:23,044 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:57:24,449 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:57:24,450 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:57:29,047 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:57:34,049 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:57:39,734 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:57:39,735 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:57:40,052 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:57:44,053 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:57:45,141 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:57:50,056 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:57:54,058 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:57:55,077 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:57:55,077 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:57:59,060 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:03,062 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:07,064 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:10,366 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:58:10,366 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:58:11,066 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:15,068 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:15,221 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:58:19,069 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:23,071 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:25,640 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:58:25,640 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:58:27,073 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:31,075 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:36,077 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:38,078 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:40,876 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:58:40,876 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:58:42,080 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:45,297 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:58:46,082 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:48,083 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:52,085 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:54,086 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:58:56,105 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:58:56,105 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:58:58,088 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:00,088 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:02,089 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:06,091 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:08,092 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:10,093 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:11,382 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:59:11,382 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:59:12,094 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:15,096 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:15,370 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:59:17,097 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:19,098 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:21,099 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:23,100 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:25,101 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:26,589 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:59:26,589 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:59:27,102 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:29,103 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:31,104 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:33,105 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:35,106 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:37,107 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:39,108 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:39,561 DEBUG   SenderThread:2277729 [sender.py:send():234] send: history
+2022-07-30 14:59:39,564 DEBUG   SenderThread:2277729 [sender.py:send():234] send: summary
+2022-07-30 14:59:39,567 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 14:59:40,109 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json
+2022-07-30 14:59:41,109 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:41,773 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:59:41,773 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 14:59:45,446 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 14:59:51,113 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 14:59:57,156 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 14:59:57,156 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:00:02,118 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:00:12,123 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:00:12,442 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:00:12,443 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:00:15,517 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:00:18,125 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:00:27,897 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:00:27,897 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:00:28,130 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:00:34,133 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:00:38,134 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:00:42,136 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:00:43,207 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:00:43,207 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:00:45,585 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:00:49,139 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:00:57,143 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:00:58,493 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:00:58,494 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:01:01,145 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:01:05,147 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:01:09,148 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:01:13,150 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:01:13,789 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:01:13,790 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:01:15,657 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:01:20,153 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:01:24,155 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:01:27,156 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:01:29,080 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:01:29,081 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:01:31,158 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:01:37,161 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:01:41,163 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:01:44,321 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:01:44,321 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:01:45,165 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:01:45,736 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:01:49,166 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:01:51,167 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:01:55,169 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:01:57,174 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:01:59,557 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:01:59,557 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:02:01,175 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:05,177 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:07,178 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:10,179 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:12,180 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:14,825 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:02:14,826 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:02:15,834 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:02:16,182 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:18,183 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:22,185 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:24,186 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:26,187 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:28,188 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:30,059 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:02:30,060 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:02:30,189 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:32,190 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:34,191 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:36,192 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:38,193 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:40,194 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:42,195 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:44,196 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:45,221 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:02:45,221 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:02:45,922 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:02:50,198 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:02:53,200 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:03:00,663 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:03:00,663 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:03:01,203 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:03:09,206 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:03:15,209 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:03:15,997 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:03:16,048 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:03:16,048 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:03:21,211 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:03:28,214 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:03:31,294 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:03:31,295 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:03:34,217 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:03:38,219 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:03:44,222 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:03:46,072 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:03:46,536 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:03:46,536 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:03:48,223 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:03:52,225 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:03:59,229 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:01,717 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:04:01,717 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:04:03,230 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:07,232 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:11,234 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:15,236 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:16,147 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:04:17,008 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:04:17,008 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:04:19,238 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:21,239 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:25,241 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:30,243 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:32,278 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:04:32,278 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:04:34,245 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:38,246 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:40,247 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:44,249 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:46,222 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:04:46,250 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:47,486 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:04:47,486 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:04:50,252 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:52,252 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:56,254 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:04:58,255 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:02,257 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:02,667 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:05:02,667 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:05:04,258 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:07,259 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:09,260 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:11,261 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:15,263 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:16,300 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:05:17,264 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:17,882 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:05:17,882 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:05:19,265 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:21,266 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:23,267 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:25,267 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:27,268 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:29,269 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:31,270 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:33,034 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:05:33,035 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:05:33,271 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:35,272 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:37,273 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:37,919 DEBUG   SenderThread:2277729 [sender.py:send():234] send: history
+2022-07-30 15:05:37,923 DEBUG   SenderThread:2277729 [sender.py:send():234] send: summary
+2022-07-30 15:05:37,926 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 15:05:38,274 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json
+2022-07-30 15:05:39,274 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:46,373 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:05:48,372 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:05:48,373 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:05:49,278 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:05:56,281 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:06:02,284 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:06:03,721 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:06:03,722 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:06:08,287 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:06:14,289 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:06:16,448 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:06:19,023 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:06:19,023 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:06:20,292 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:06:24,294 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:06:31,297 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:06:34,268 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:06:34,269 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:06:35,299 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:06:41,301 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:06:45,303 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:06:46,522 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:06:49,305 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:06:49,515 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:06:49,515 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:06:53,307 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:06:57,309 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:02,310 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:04,832 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:07:04,833 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:07:06,312 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:10,314 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:14,316 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:16,317 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:16,595 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:07:20,076 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:07:20,077 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:07:20,319 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:24,320 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:26,321 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:30,323 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:32,324 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:35,982 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:07:35,982 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:07:37,326 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:39,327 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:43,329 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:45,330 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:46,670 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:07:49,332 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:51,168 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:07:51,168 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:07:51,333 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:53,334 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:55,334 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:07:59,336 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:01,337 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:03,338 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:05,339 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:06,355 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:08:06,355 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:08:07,340 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:09,341 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:11,342 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:13,343 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:15,344 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:16,746 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:08:17,345 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:19,346 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:21,347 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:21,575 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:08:21,576 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:08:23,348 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:25,349 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:36,864 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:08:36,864 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:08:37,354 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:43,357 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:46,823 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:08:52,347 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:08:52,347 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:08:52,361 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:08:58,363 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:09:02,365 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:09:07,646 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:09:07,647 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:09:08,368 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:09:12,369 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:09:16,900 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:09:19,372 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:09:22,941 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:09:22,941 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:09:23,374 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:09:27,376 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:09:33,379 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:09:37,381 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:09:38,261 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:09:38,261 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:09:41,382 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:09:45,384 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:09:46,978 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:09:49,386 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:09:53,469 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:09:53,470 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:09:54,389 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:09:58,390 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:00,392 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:04,393 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:08,395 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:08,735 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:10:08,735 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:10:12,397 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:14,398 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:17,055 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:10:18,400 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:22,402 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:24,051 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:10:24,051 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:10:24,402 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:28,404 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:30,405 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:32,406 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:37,408 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:39,290 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:10:39,291 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:10:39,409 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:41,410 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:45,412 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:47,133 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:10:47,413 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:49,414 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:51,415 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:53,416 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:54,480 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:10:54,480 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:10:55,417 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:57,418 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:10:59,419 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:11:01,420 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:11:03,421 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:11:05,422 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:11:07,423 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:11:09,424 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:11:09,660 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:11:09,660 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:11:11,425 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:11:13,417 DEBUG   SenderThread:2277729 [sender.py:send():234] send: history
+2022-07-30 15:11:13,421 DEBUG   SenderThread:2277729 [sender.py:send():234] send: summary
+2022-07-30 15:11:13,424 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 15:11:13,425 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json
+2022-07-30 15:11:13,426 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:11:15,426 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:11:17,207 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:11:24,430 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:11:24,977 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:11:24,977 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:11:32,433 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:11:39,436 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:11:40,456 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:11:40,457 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:11:45,439 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:11:47,283 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:11:49,441 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:11:55,443 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:11:55,697 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:11:55,698 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:12:02,447 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:12:06,448 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:12:10,991 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:12:10,992 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:12:12,451 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:12:16,453 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:12:17,359 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:12:20,455 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:12:24,456 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:12:26,422 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:12:26,422 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:12:28,458 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:12:33,460 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:12:37,462 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:12:41,464 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:12:41,621 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:12:41,621 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:12:45,466 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:12:47,434 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:12:49,468 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:12:51,469 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:12:55,470 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:12:56,891 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:12:56,891 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:12:59,472 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:03,474 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:05,475 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:09,477 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:11,478 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:12,122 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:13:12,122 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:13:16,480 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:17,510 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:13:18,481 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:22,483 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:24,484 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:26,485 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:27,315 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:13:27,315 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:13:30,487 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:32,488 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:34,489 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:36,490 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:38,491 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:40,492 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:42,493 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:42,570 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:13:42,571 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:13:44,494 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:46,495 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:47,587 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:13:48,496 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:50,497 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:52,498 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:54,499 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:56,500 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:13:57,800 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:13:57,801 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:13:59,502 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:14:01,503 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:14:03,504 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:14:11,507 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:14:13,137 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:14:13,138 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:14:17,661 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:14:19,511 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:14:25,513 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:14:28,386 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:14:28,387 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:14:32,516 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:14:38,519 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:14:42,521 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:14:43,657 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:14:43,658 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:14:47,735 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:14:48,523 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:14:52,525 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:14:58,528 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:14:58,920 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:14:58,921 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:15:03,530 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:15:07,531 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:15:11,533 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:15:14,108 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:15:14,109 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:15:15,535 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:15:17,809 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:15:21,538 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:15:25,540 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:15:27,541 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:15:29,340 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:15:29,341 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:15:32,543 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:15:36,544 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:15:40,546 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:15:42,547 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:15:44,549 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:15:44,549 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:15:46,549 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:15:47,885 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:15:50,551 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:15:54,553 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:15:56,554 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:15:59,798 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:15:59,798 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:16:00,556 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:02,557 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:04,558 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:08,560 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:11,561 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:15,040 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:16:15,040 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:16:15,563 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:17,564 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:17,953 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:16:19,565 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:21,566 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:23,567 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:27,569 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:30,738 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:16:30,738 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:16:31,571 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:33,572 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:37,574 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:39,575 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:41,576 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:43,577 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:45,578 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:45,945 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:16:45,945 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:16:47,579 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:48,016 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:16:49,580 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:50,678 DEBUG   SenderThread:2277729 [sender.py:send():234] send: history
+2022-07-30 15:16:50,681 DEBUG   SenderThread:2277729 [sender.py:send():234] send: summary
+2022-07-30 15:16:50,684 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 15:16:51,581 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json
+2022-07-30 15:16:51,581 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:16:53,582 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:17:01,248 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:17:01,248 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:17:01,585 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:17:10,590 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:17:16,592 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:17:16,637 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:17:16,637 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:17:18,081 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:17:22,595 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:17:28,597 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:17:31,977 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:17:31,978 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:17:33,600 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:17:37,601 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:17:43,604 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:17:47,303 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:17:47,303 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:17:47,606 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:17:48,151 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:17:53,608 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:17:57,610 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:01,611 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:02,588 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:18:02,588 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:18:06,614 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:10,615 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:14,617 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:17,854 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:18:17,854 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:18:18,221 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:18:18,619 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:22,621 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:24,622 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:28,624 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:32,626 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:33,059 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:18:33,059 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:18:36,627 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:38,628 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:42,630 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:45,631 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:48,294 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:18:48,298 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:18:48,299 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:18:49,634 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:51,634 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:55,636 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:18:57,637 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:01,639 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:03,514 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:19:03,514 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:19:03,640 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:05,641 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:07,642 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:11,644 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:13,645 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:15,646 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:17,648 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:18,371 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:19:18,687 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:19:18,688 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:19:19,649 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:21,650 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:23,651 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:25,651 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:27,652 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:29,653 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:31,654 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:33,655 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:33,870 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:19:33,871 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:19:35,656 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:38,657 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:48,445 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:19:48,662 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:19:49,134 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:19:49,134 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:19:56,665 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:20:04,605 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:20:04,605 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:20:05,670 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:20:11,672 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:20:18,520 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:20:19,884 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:20:19,885 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:20:21,677 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:20:27,679 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:20:31,681 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:20:35,097 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:20:35,098 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:20:37,684 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:20:46,688 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:20:48,595 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:20:50,378 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:20:50,378 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:20:50,690 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:20:54,692 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:20:58,694 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:02,696 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:05,549 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:21:05,550 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:21:06,698 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:11,700 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:15,702 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:18,672 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:21:19,704 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:20,763 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:21:20,763 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:21:23,706 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:29,708 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:33,710 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:35,980 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:21:35,981 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:21:37,712 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:39,713 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:43,715 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:45,716 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:48,748 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:21:50,718 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:51,217 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:21:51,218 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:21:52,719 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:56,721 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:21:58,722 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:00,723 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:04,725 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:06,443 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:22:06,443 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:22:06,726 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:08,727 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:12,729 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:14,730 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:16,731 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:18,732 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:18,817 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:22:20,733 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:21,645 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:22:21,645 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:22:22,735 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:24,736 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:26,737 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:28,738 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:30,739 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:32,740 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:35,742 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:36,843 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:22:36,843 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:22:37,743 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:39,744 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:40,772 DEBUG   SenderThread:2277729 [sender.py:send():234] send: history
+2022-07-30 15:22:40,775 DEBUG   SenderThread:2277729 [sender.py:send():234] send: summary
+2022-07-30 15:22:40,778 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 15:22:41,745 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json
+2022-07-30 15:22:41,745 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:43,746 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:48,884 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:22:52,045 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:22:52,045 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:22:52,750 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:22:58,753 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:23:06,756 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:23:07,473 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:23:07,474 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:23:10,758 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:23:16,761 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:23:18,953 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:23:22,806 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:23:22,806 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:23:23,764 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:23:29,767 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:23:33,769 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:23:37,771 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:23:38,146 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:23:38,147 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:23:43,773 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:23:47,775 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:23:49,023 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:23:51,777 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:23:53,354 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:23:53,354 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:23:55,779 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:23:59,781 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:03,782 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:08,587 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:24:08,588 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:24:08,784 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:12,786 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:16,789 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:18,790 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:19,093 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:24:22,792 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:23,901 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:24:23,901 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:24:26,794 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:32,797 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:34,798 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:38,800 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:39,124 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:24:39,124 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:24:40,801 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:44,804 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:46,805 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:48,806 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:49,164 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:24:52,808 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:54,410 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:24:54,411 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:24:54,809 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:24:56,810 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:00,812 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:02,813 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:04,814 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:06,815 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:09,570 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:25:09,570 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:25:09,816 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:11,817 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:13,819 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:15,820 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:17,821 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:19,236 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:25:19,822 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:21,823 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:23,824 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:24,729 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:25:24,730 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:25:25,825 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:27,826 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:29,828 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:31,829 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:39,832 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:40,055 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:25:40,055 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:25:46,836 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:49,308 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:25:52,838 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:25:55,467 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:25:55,468 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:25:58,841 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:26:06,845 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:26:10,786 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:26:10,786 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:26:10,846 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:26:16,849 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:26:19,377 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:26:21,851 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:26:26,076 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:26:26,077 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:26:27,854 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:26:31,856 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:26:35,858 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:26:39,860 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:26:41,341 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:26:41,342 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:26:43,861 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:26:47,863 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:26:49,450 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:26:51,865 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:26:55,867 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:00,870 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:01,432 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:27:01,432 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:27:04,871 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:08,873 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:12,875 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:14,877 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:16,960 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:27:16,960 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:27:18,878 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:19,525 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:27:22,881 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:24,882 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:28,883 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:30,885 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:32,136 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:27:32,137 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:27:32,886 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:36,888 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:38,889 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:42,891 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:44,892 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:47,327 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:27:47,328 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:27:47,893 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:49,601 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:27:49,894 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:53,896 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:55,897 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:57,898 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:27:59,899 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:01,900 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:02,586 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:28:02,586 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:28:03,901 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:05,902 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:07,903 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:09,904 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:11,905 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:13,907 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:15,908 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:16,726 DEBUG   SenderThread:2277729 [sender.py:send():234] send: history
+2022-07-30 15:28:16,730 DEBUG   SenderThread:2277729 [sender.py:send():234] send: summary
+2022-07-30 15:28:16,734 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 15:28:16,908 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json
+2022-07-30 15:28:17,909 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:17,981 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:28:17,981 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:28:18,909 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:19,676 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:28:27,914 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:33,216 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:28:33,217 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:28:34,917 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:42,921 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:48,564 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:28:48,564 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:28:48,923 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:49,750 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:28:54,926 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:28:58,928 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:29:03,830 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:29:03,831 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:29:05,931 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:29:09,933 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:29:15,936 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:29:19,089 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:29:19,090 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:29:19,825 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:29:19,937 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:29:23,939 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:29:27,941 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:29:32,943 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:29:34,344 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:29:34,345 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:29:36,944 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:29:40,946 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:29:44,948 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:29:48,950 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:29:49,680 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:29:49,680 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:29:49,900 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:29:52,951 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:29:56,953 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:29:58,954 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:02,956 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:04,954 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:30:04,954 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:30:07,959 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:09,960 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:13,962 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:15,963 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:19,965 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:19,978 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:30:20,252 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:30:20,253 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:30:21,966 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:25,967 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:27,969 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:29,970 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:31,971 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:35,502 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:30:35,503 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:30:35,972 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:37,974 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:39,974 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:41,975 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:43,976 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:45,977 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:47,978 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:49,979 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:50,054 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:30:50,682 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:30:50,682 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:30:51,980 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:53,981 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:55,982 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:30:58,983 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:31:00,984 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:31:02,985 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:31:04,986 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:31:06,000 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:31:06,000 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:31:06,988 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:31:14,991 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:31:20,129 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:31:21,339 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:31:21,339 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:31:22,995 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:31:29,998 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:31:36,001 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:31:36,649 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:31:36,649 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:31:40,003 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:31:46,005 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:31:50,205 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:31:51,961 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:31:51,961 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:31:53,009 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:31:59,012 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:32:03,014 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:32:07,015 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:32:07,213 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:32:07,213 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:32:14,018 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:32:18,020 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:32:20,280 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:32:22,022 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:32:22,483 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:32:22,484 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:32:26,024 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:32:30,026 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:32:34,027 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:32:37,761 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:32:37,761 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:32:38,029 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:32:40,030 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:32:44,032 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:32:49,034 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:32:50,355 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:32:53,036 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:32:53,074 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:32:53,074 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:32:55,037 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:32:59,039 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:03,041 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:05,042 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:07,043 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:08,269 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:33:08,269 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:33:11,045 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:13,046 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:17,048 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:19,049 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:20,429 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:33:22,051 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:23,519 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:33:23,520 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:33:26,053 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:28,054 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:30,055 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:32,056 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:34,057 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:36,058 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:38,059 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:38,720 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:33:38,720 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:33:40,060 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:42,061 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:44,062 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:46,063 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:48,064 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:50,065 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:50,503 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:33:52,066 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:53,682 DEBUG   SenderThread:2277729 [sender.py:send():234] send: history
+2022-07-30 15:33:53,686 DEBUG   SenderThread:2277729 [sender.py:send():234] send: summary
+2022-07-30 15:33:53,690 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 15:33:54,067 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json
+2022-07-30 15:33:54,067 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:33:54,464 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:33:54,464 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:33:56,068 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:34:05,073 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:34:09,786 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:34:09,786 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:34:13,076 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:34:19,079 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:34:20,568 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:34:25,081 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:34:25,333 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:34:25,333 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:34:32,084 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:34:36,086 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:34:40,632 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:34:40,632 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:34:42,089 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:34:46,090 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:34:50,092 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:34:50,635 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:34:55,952 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:34:55,952 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:34:56,095 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:00,097 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:04,098 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:09,100 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:11,193 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:35:11,193 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:35:13,102 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:17,104 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:20,704 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:35:21,105 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:25,107 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:26,383 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:35:26,384 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:35:29,109 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:33,111 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:35,111 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:40,113 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:41,617 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:35:41,617 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:35:44,115 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:46,116 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:50,118 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:50,781 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:35:52,119 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:56,120 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:35:56,840 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:35:56,841 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:35:58,121 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:02,123 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:04,124 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:06,125 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:08,126 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:12,060 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:36:12,060 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:36:12,127 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:14,128 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:17,130 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:19,131 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:20,858 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:36:21,132 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:23,132 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:25,133 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:27,134 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:27,232 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:36:27,232 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:36:29,135 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:31,136 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:33,137 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:35,138 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:37,139 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:39,140 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:41,141 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:42,517 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:36:42,517 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:36:50,932 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:36:53,146 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:36:57,731 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:36:57,732 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:37:00,149 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:37:06,151 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:37:12,153 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:37:13,096 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:37:13,096 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:37:18,156 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:37:21,008 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:37:24,158 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:37:28,160 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:37:28,479 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:37:28,479 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:37:35,163 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:37:39,165 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:37:43,166 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:37:43,769 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:37:43,770 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:37:47,168 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:37:51,086 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:37:51,170 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:37:55,171 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:37:59,085 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:37:59,085 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:37:59,173 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:03,175 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:08,177 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:12,179 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:14,347 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:38:14,348 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:38:16,180 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:20,182 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:21,161 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:38:22,183 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:26,184 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:29,576 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:38:29,577 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:38:30,186 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:32,187 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:36,189 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:38,189 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:42,191 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:44,192 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:44,811 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:38:44,812 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:38:46,193 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:50,194 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:51,234 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:38:52,195 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:55,196 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:38:59,198 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:00,026 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:39:00,026 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:39:01,199 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:03,200 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:05,201 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:07,202 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:09,203 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:11,204 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:13,205 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:15,206 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:15,215 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 15:39:15,215 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 15:39:17,207 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:19,208 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:21,309 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:39:23,210 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:25,211 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:27,212 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:28,835 DEBUG   SenderThread:2277729 [sender.py:send():234] send: history
+2022-07-30 15:39:28,838 DEBUG   SenderThread:2277729 [sender.py:send():234] send: summary
+2022-07-30 15:39:28,841 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 15:39:29,180 DEBUG   SenderThread:2277729 [sender.py:send():234] send: history
+2022-07-30 15:39:29,183 DEBUG   SenderThread:2277729 [sender.py:send():234] send: summary
+2022-07-30 15:39:29,186 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 15:39:29,213 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json
+2022-07-30 15:39:29,213 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:31,213 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:31,395 DEBUG   SenderThread:2277729 [sender.py:send():234] send: telemetry
+2022-07-30 15:39:31,395 DEBUG   SenderThread:2277729 [sender.py:send():234] send: exit
+2022-07-30 15:39:31,395 INFO    SenderThread:2277729 [sender.py:send_exit():366] handling exit code: 1
+2022-07-30 15:39:31,397 INFO    SenderThread:2277729 [sender.py:send_exit():368] handling runtime: 6205
+2022-07-30 15:39:31,399 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 15:39:31,399 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 15:39:31,400 INFO    SenderThread:2277729 [sender.py:send_exit():374] send defer
+2022-07-30 15:39:31,400 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 15:39:31,400 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 15:39:31,401 INFO    HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 0
+2022-07-30 15:39:31,401 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: defer
+2022-07-30 15:39:31,401 INFO    SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 0
+2022-07-30 15:39:31,401 INFO    SenderThread:2277729 [sender.py:transition_state():387] send defer: 1
+2022-07-30 15:39:31,401 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 15:39:31,401 INFO    HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 1
+2022-07-30 15:39:31,435 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: defer
+2022-07-30 15:39:31,435 INFO    SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 1
+2022-07-30 15:39:31,435 INFO    SenderThread:2277729 [sender.py:transition_state():387] send defer: 2
+2022-07-30 15:39:31,435 DEBUG   SenderThread:2277729 [sender.py:send():234] send: stats
+2022-07-30 15:39:31,435 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 15:39:31,436 INFO    HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 2
+2022-07-30 15:39:31,436 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: defer
+2022-07-30 15:39:31,436 INFO    SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 2
+2022-07-30 15:39:31,436 INFO    SenderThread:2277729 [sender.py:transition_state():387] send defer: 3
+2022-07-30 15:39:31,436 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 15:39:31,436 INFO    HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 3
+2022-07-30 15:39:31,438 DEBUG   SenderThread:2277729 [sender.py:send():234] send: summary
+2022-07-30 15:39:31,441 INFO    SenderThread:2277729 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 15:39:31,442 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: defer
+2022-07-30 15:39:31,442 INFO    SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 3
+2022-07-30 15:39:31,442 INFO    SenderThread:2277729 [sender.py:transition_state():387] send defer: 4
+2022-07-30 15:39:31,442 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 15:39:31,442 INFO    HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 4
+2022-07-30 15:39:31,442 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: defer
+2022-07-30 15:39:31,442 INFO    SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 4
+2022-07-30 15:39:31,502 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 15:39:31,657 INFO    SenderThread:2277729 [sender.py:transition_state():387] send defer: 5
+2022-07-30 15:39:31,658 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 15:39:31,658 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 15:39:31,658 INFO    HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 5
+2022-07-30 15:39:31,658 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: defer
+2022-07-30 15:39:31,658 INFO    SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 5
+2022-07-30 15:39:31,658 INFO    SenderThread:2277729 [dir_watcher.py:finish():283] shutting down directory watcher
+2022-07-30 15:39:31,759 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 15:39:32,214 INFO    Thread-8  :2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json
+2022-07-30 15:39:32,214 INFO    SenderThread:2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/config.yaml
+2022-07-30 15:39:32,215 INFO    SenderThread:2277729 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:32,215 INFO    SenderThread:2277729 [dir_watcher.py:finish():313] scan: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files
+2022-07-30 15:39:32,215 INFO    SenderThread:2277729 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/config.yaml config.yaml
+2022-07-30 15:39:32,215 INFO    SenderThread:2277729 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/diff.patch diff.patch
+2022-07-30 15:39:32,215 INFO    SenderThread:2277729 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/requirements.txt requirements.txt
+2022-07-30 15:39:32,216 INFO    SenderThread:2277729 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log output.log
+2022-07-30 15:39:32,216 INFO    SenderThread:2277729 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json wandb-summary.json
+2022-07-30 15:39:32,217 INFO    SenderThread:2277729 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-metadata.json wandb-metadata.json
+2022-07-30 15:39:32,225 INFO    SenderThread:2277729 [dir_watcher.py:finish():327] scan save: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/code/run_flax_speech_recognition_ctc.py code/run_flax_speech_recognition_ctc.py
+2022-07-30 15:39:32,225 INFO    SenderThread:2277729 [sender.py:transition_state():387] send defer: 6
+2022-07-30 15:39:32,225 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 15:39:32,229 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 15:39:32,229 INFO    HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 6
+2022-07-30 15:39:32,229 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: defer
+2022-07-30 15:39:32,229 INFO    SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 6
+2022-07-30 15:39:32,229 INFO    SenderThread:2277729 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 15:39:32,327 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 15:39:32,328 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 15:39:32,429 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 15:39:32,430 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 15:39:32,531 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 15:39:32,532 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 15:39:32,633 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 15:39:32,634 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 15:39:32,730 INFO    Thread-15 :2277729 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/requirements.txt
+2022-07-30 15:39:32,735 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 15:39:32,735 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 15:39:32,741 INFO    Thread-14 :2277729 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/config.yaml
+2022-07-30 15:39:32,764 INFO    Thread-17 :2277729 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/wandb-summary.json
+2022-07-30 15:39:32,837 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 15:39:32,837 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 15:39:32,922 INFO    Thread-16 :2277729 [upload_job.py:push():137] Uploaded file /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/files/output.log
+2022-07-30 15:39:32,939 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 15:39:32,939 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 15:39:33,041 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 15:39:33,041 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 15:39:33,123 INFO    Thread-7  :2277729 [sender.py:transition_state():387] send defer: 7
+2022-07-30 15:39:33,123 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 15:39:33,123 INFO    HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 7
+2022-07-30 15:39:33,124 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: defer
+2022-07-30 15:39:33,124 INFO    SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 7
+2022-07-30 15:39:33,143 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 15:39:34,381 INFO    SenderThread:2277729 [sender.py:transition_state():387] send defer: 8
+2022-07-30 15:39:34,382 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 15:39:34,382 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 15:39:34,383 INFO    HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 8
+2022-07-30 15:39:34,383 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: defer
+2022-07-30 15:39:34,383 INFO    SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 8
+2022-07-30 15:39:34,383 INFO    SenderThread:2277729 [sender.py:transition_state():387] send defer: 9
+2022-07-30 15:39:34,383 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: defer
+2022-07-30 15:39:34,384 INFO    HandlerThread:2277729 [handler.py:handle_request_defer():147] handle defer: 9
+2022-07-30 15:39:34,384 DEBUG   SenderThread:2277729 [sender.py:send():234] send: final
+2022-07-30 15:39:34,384 DEBUG   SenderThread:2277729 [sender.py:send():234] send: footer
+2022-07-30 15:39:34,384 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: defer
+2022-07-30 15:39:34,384 INFO    SenderThread:2277729 [sender.py:send_request_defer():383] handle sender defer: 9
+2022-07-30 15:39:34,484 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: poll_exit
+2022-07-30 15:39:34,484 DEBUG   SenderThread:2277729 [sender.py:send_request():248] send_request: poll_exit
+2022-07-30 15:39:34,484 INFO    SenderThread:2277729 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 15:39:34,744 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: get_summary
+2022-07-30 15:39:34,749 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: sampled_history
+2022-07-30 15:39:34,750 DEBUG   HandlerThread:2277729 [handler.py:handle_request():130] handle_request: shutdown
+2022-07-30 15:39:34,750 INFO    HandlerThread:2277729 [handler.py:finish():731] shutting down handler
+2022-07-30 15:39:35,384 INFO    WriterThread:2277729 [datastore.py:close():281] close: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/run-y1b5rbiq.wandb
+2022-07-30 15:39:35,742 INFO    SenderThread:2277729 [sender.py:finish():1070] shutting down sender
+2022-07-30 15:39:35,743 INFO    SenderThread:2277729 [file_pusher.py:finish():177] shutting down file pusher
+2022-07-30 15:39:35,743 INFO    SenderThread:2277729 [file_pusher.py:join():182] waiting for file pusher
+2022-07-30 15:39:35,746 INFO    MainThread:2277729 [internal.py:handle_exit():77] Internal process exited
diff --git a/wandb/run-20220730_135604-y1b5rbiq/logs/debug.log b/wandb/run-20220730_135604-y1b5rbiq/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..b0afc77472903f407fa6605df693dbafae082e0d
--- /dev/null
+++ b/wandb/run-20220730_135604-y1b5rbiq/logs/debug.log
@@ -0,0 +1,150 @@
+2022-07-30 13:56:04,286 INFO    MainThread:2276371 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-30 13:56:04,286 INFO    MainThread:2276371 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-30 13:56:04,286 INFO    MainThread:2276371 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/logs/debug.log
+2022-07-30 13:56:04,286 INFO    MainThread:2276371 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_135604-y1b5rbiq/logs/debug-internal.log
+2022-07-30 13:56:04,286 INFO    MainThread:2276371 [wandb_init.py:init():404] calling init triggers
+2022-07-30 13:56:04,286 INFO    MainThread:2276371 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-30 13:56:04,286 INFO    MainThread:2276371 [wandb_init.py:init():460] starting backend
+2022-07-30 13:56:04,286 INFO    MainThread:2276371 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-30 13:56:04,339 INFO    MainThread:2276371 [backend.py:ensure_launched():216] starting backend process...
+2022-07-30 13:56:04,382 INFO    MainThread:2276371 [backend.py:ensure_launched():221] started backend process with pid: 2277729
+2022-07-30 13:56:04,384 INFO    MainThread:2276371 [wandb_init.py:init():469] backend started and connected
+2022-07-30 13:56:04,398 INFO    MainThread:2276371 [wandb_init.py:init():533] updated telemetry
+2022-07-30 13:56:04,503 INFO    MainThread:2276371 [wandb_init.py:init():563] communicating current version
+2022-07-30 13:56:05,225 INFO    MainThread:2276371 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-30 13:56:05,225 INFO    MainThread:2276371 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-30 13:56:05,409 INFO    MainThread:2276371 [wandb_init.py:init():606] starting run threads in backend
+2022-07-30 13:56:07,930 INFO    MainThread:2276371 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-30 13:56:07,930 INFO    MainThread:2276371 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-30 13:56:07,931 INFO    MainThread:2276371 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-30 13:56:07,933 INFO    MainThread:2276371 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-30 13:56:07,933 INFO    MainThread:2276371 [wandb_init.py:init():633] run started, returning control to user process
+2022-07-30 15:39:28,849 INFO    MainThread:2276371 [wandb_run.py:_atexit_cleanup():1780] got exitcode: 1
+2022-07-30 15:39:29,174 INFO    MainThread:2276371 [wandb_run.py:_restore():1752] restore
+2022-07-30 15:39:31,401 INFO    MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 74351
+}
+
+2022-07-30 15:39:31,658 INFO    MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 2
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 74351
+}
+
+2022-07-30 15:39:32,226 INFO    MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 400083
+}
+
+2022-07-30 15:39:32,329 INFO    MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 74351
+  total_bytes: 400083
+}
+
+2022-07-30 15:39:32,431 INFO    MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400083
+  total_bytes: 400083
+}
+
+2022-07-30 15:39:32,532 INFO    MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400083
+  total_bytes: 400083
+}
+
+2022-07-30 15:39:32,634 INFO    MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400083
+  total_bytes: 400083
+}
+
+2022-07-30 15:39:32,736 INFO    MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400083
+  total_bytes: 400083
+}
+
+2022-07-30 15:39:32,838 INFO    MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400083
+  total_bytes: 400083
+}
+
+2022-07-30 15:39:32,940 INFO    MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400083
+  total_bytes: 400083
+}
+
+2022-07-30 15:39:33,042 INFO    MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400083
+  total_bytes: 400083
+}
+
+2022-07-30 15:39:34,383 INFO    MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400083
+  total_bytes: 400083
+}
+
+2022-07-30 15:39:34,743 INFO    MainThread:2276371 [wandb_run.py:_wait_for_finish():1912] got exit ret: done: true
+exit_result {
+}
+file_counts {
+  wandb_count: 6
+  other_count: 1
+}
+pusher_stats {
+  uploaded_bytes: 400083
+  total_bytes: 400083
+}
+local_info {
+}
+
+2022-07-30 15:39:36,428 INFO    MainThread:2276371 [wandb_run.py:_append_history():2130] rendering history
+2022-07-30 15:39:36,428 INFO    MainThread:2276371 [wandb_run.py:_append_summary():2085] rendering summary
+2022-07-30 15:39:36,429 INFO    MainThread:2276371 [wandb_run.py:_append_files():2180] logging synced files
diff --git a/wandb/run-20220730_135604-y1b5rbiq/run-y1b5rbiq.wandb b/wandb/run-20220730_135604-y1b5rbiq/run-y1b5rbiq.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..b0633fd56489a89073b8644ab56eb9287268163a
--- /dev/null
+++ b/wandb/run-20220730_135604-y1b5rbiq/run-y1b5rbiq.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db402d0277fed45bac85e9335ca1cef51f9db3b99416ba9b9db31b7b619b4d1d
+size 1437107
diff --git a/wandb/run-20220730_174606-j2u4n7h4/files/code/run_flax_speech_recognition_ctc.py b/wandb/run-20220730_174606-j2u4n7h4/files/code/run_flax_speech_recognition_ctc.py
new file mode 100644
index 0000000000000000000000000000000000000000..11df8fb90ea54a20f8f34bbb40442193e151ddc2
--- /dev/null
+++ b/wandb/run-20220730_174606-j2u4n7h4/files/code/run_flax_speech_recognition_ctc.py
@@ -0,0 +1,1604 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2022 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the Flax library models for connectionist temporal classification (CTC) speech recognition.
+"""
+# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments.
+
+import logging
+import math
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import datasets
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from tqdm import tqdm
+
+import flax
+import jax
+import jax.numpy as jnp
+import optax
+import transformers
+import wandb as wandb
+from flax import core, jax_utils, struct, traverse_util
+from flax.jax_utils import unreplicate, pad_shard_unpad
+from flax.training.common_utils import get_metrics, shard, shard_prng_key
+from huggingface_hub import Repository
+from models import Wav2Vec2Config, FlaxWav2Vec2ForCTC
+from optax._src import linear_algebra
+from transformers import (
+    AutoFeatureExtractor,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    TrainingArguments,
+    is_tensorboard_available,
+    set_seed,
+)
+from transformers.file_utils import get_full_repo_name
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+
+require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@flax.struct.dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    feature_extractor_name: Optional[str] = field(
+        default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
+    )
+    use_fast_tokenizer: bool = field(
+        default=True,
+        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The hidden activation dropout probability in the embeddings, encoder, and pooler."
+        },
+    )
+    hidden_dropout: float = field(
+        default=0.1,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    feat_proj_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The feat proj dropout probability for feature encoder representations."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.1,
+        metadata={
+            "help": "The spec aug dropout probability for feature encoder representations."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+                    "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+    ctc_zero_infinity: Optional[bool] = field(
+        default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
+    )
+
+
+@flax.struct.dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: str = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    text_column: Optional[str] = field(
+        default=None,
+        metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
+    )
+    dataset_cache_dir: Optional[str] = field(
+        default=None, metadata={"help": "Path to cache directory for saving and loading datasets"}
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Truncate audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    max_label_length: Optional[int] = field(
+        default=512,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    min_label_length: Optional[int] = field(
+        default=2,
+        metadata={
+            "help": "The minimum total sequence length for target text after tokenization. Sequences shorter "
+            "than this will be filtered."
+        },
+    )
+    pad_input_to_multiple_of: Optional[int] = field(
+        default=32000,
+        metadata={
+            "help": "If set will pad the input sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    pad_target_to_multiple_of: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set will pad the target sequence to a multiple of the provided value. "
+            "This is important to avoid triggering recompilations on TPU."
+        },
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    train_split_name: str = field(
+        default="train",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    do_lower_case: bool = field(
+        default=True,
+        metadata={"help": "Whether the target text should be lower cased."},
+    )
+    wandb_project: str = field(
+        default="flax-speech-recognition-ctc",
+        metadata={"help": "The name of the wandb project."},
+    )
+    wandb_name: str = field(
+        default=None,
+        metadata={"help": "The name of the wandb run."},
+    )
+    wandb_job_type: str = field(
+        default="CTC",
+        metadata={"help": "The name of the wandb job type."},
+    )
+    test_split_name: str = field(
+        default="test",
+        metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"},
+    )
+    remove_punctuation: bool = field(
+        default=False, metadata={"help": "Whether or not to remove punctuation during training."}
+    )
+
+
+# @flax.struct.dataclass
+@dataclass
+class FlaxTrainingArguments(TrainingArguments):
+    precision: str = field(
+        default="full",
+        metadata={
+            "help": "Whether to enable mixed-precision training. If true, the optimizer is stored in half-precision (bfloat16) and computations are executed in half-precision"
+            "**Note that this only specifies the dtype of the computation and optimizer state. It does not influence the dtype of model parameters.**"
+        },
+    )
+    matmul_precision: str = field(
+        default="default",
+        metadata={
+            "help": "Default floating-point precision of internal computations used in TPU matrix multiplications and convolutions. "
+            "This configuration option controls the default precision for JAX operations that take an optional precision argument (e.g. `lax.conv_general_dilated` and `lax.dot`). "
+            "This configuration option does not change the behaviours of such calls with explicit precision arguments; "
+            "it only changes the behaviors of calls with no such argument provided. "
+            "One of `['highest', 'float32', 'high', 'bfloat16_3x', 'default', 'bfloat16', 'fastest', None]`."
+        },
+    )
+    multisteps: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use Optax MultiSteps for gradient accumulation. If `False` (default) and `gradient_accumulation_steps > 1`, "
+            "a custom gradient accumulation implementation will be employed."
+        },
+    )
+
+
+def to_fp32(t):
+    return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+
+
+def to_bf16(t):
+    return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+
+
+class MixedPrecisionTrainState(struct.PyTreeNode):
+    """Train state for use with a single Optax optimizer.
+    Adapted from flax train_state https://github.com/google/flax/blob/main/flax/training/train_state.py
+
+    Synopsis::
+
+        state = TrainState.create(
+            apply_fn=model.apply,
+            params=variables['params'],
+            tx=tx)
+        grad_fn = jax.grad(make_loss_fn(state.apply_fn))
+        for batch in data:
+          grads = grad_fn(state.params, batch)
+          state = state.apply_gradients(grads=grads)
+
+    Args:
+      step: Counter starts at 0 and is incremented by every call to
+        `.apply_gradients()`.
+      apply_fn: Usually set to `model.apply()`. Kept in this dataclass for
+        convenience to have a shorter params list for the `train_step()` function
+        in your training loop.
+      params: The parameters to be updated by `tx` and used by `apply_fn`.
+      tx: An Optax gradient transformation.
+      opt_state: The state for `tx`.
+      dropout_rng: PRNG key for stochastic operations.
+      bf16: Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training.
+    """
+
+    step: int
+    apply_fn: Callable = struct.field(pytree_node=False)
+    get_attention_mask_fn: Callable = struct.field(pytree_node=False)
+    params: core.FrozenDict[str, Any]
+    tx: optax.GradientTransformation = struct.field(pytree_node=False)
+    opt_state: optax.OptState
+    dropout_rng: jnp.ndarray
+    max_grad_norm: Optional[float] = 1.0
+
+    def apply_gradients(self, *, grads, to_dtype, **kwargs):
+        """Updates `step`, `params`, `opt_state` and `**kwargs` in return value.
+
+        Note that internally this function calls `.tx.update()` followed by a call
+        to `optax.apply_updates()` to update `params` and `opt_state`.
+
+        Args:
+          grads: Gradients that have the same pytree structure as `.params`.
+          **kwargs: Additional dataclass attributes that should be `.replace()`-ed.
+
+        Returns:
+          An updated instance of `self` with `step` incremented by one, `params`
+          and `opt_state` updated by applying `grads`, and additional attributes
+          replaced as specified by `kwargs`.
+        """
+
+        # clip gradients by global l2 norm
+        casted_max_grad_norm = to_dtype(self.max_grad_norm)
+        g_norm = linear_algebra.global_norm(grads)
+        g_norm = jnp.maximum(casted_max_grad_norm, g_norm)
+        grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)
+
+        # perform update step in fp32 and subsequently downcast optimizer states if mixed precision training
+        # grads and opt_state in bf16 (need to upcast), params in fp32 (leave as is)
+        updates, new_opt_state = self.tx.update(to_fp32(grads), to_fp32(self.opt_state), self.params)
+
+        new_params = optax.apply_updates(self.params, updates)
+        return self.replace(
+            step=self.step + 1,
+            params=new_params,
+            opt_state=to_dtype(new_opt_state),
+            **kwargs,
+        )
+
+    @classmethod
+    def create(cls, *, apply_fn, params, tx, to_dtype, **kwargs):
+        """Creates a new instance with `step=0` and initialized `opt_state`."""
+        # downcast optimizer state to bf16 if mixed-precision training
+        opt_state = tx.init(to_dtype(params)) if tx is not None else None
+        return cls(
+            step=0,
+            apply_fn=apply_fn,
+            params=params,
+            tx=tx,
+            opt_state=opt_state,
+            **kwargs,
+        )
+
+    def replicate(self):
+        return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
+
+
+@flax.struct.dataclass
+class FlaxDataCollatorSpeechSeq2SeqWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor ([`Wav2Vec2Processor`])
+            The processor used for proccessing the data.
+        decoder_start_token_id (:obj: `int`)
+            The begin-of-sentence of the decoder.
+        input_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned input sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        target_padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned target sequences (according to the model's padding side and padding index).
+            See above for details.
+        max_input_length (:obj:`float`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        pad_input_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the input sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        pad_target_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the target sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+
+    processor: Any
+    input_padding: Union[bool, str] = "longest"
+    label_padding: Union[bool, str] = "max_length"
+    pad_input_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_label: Optional[int] = None
+    max_input_length: Optional[float] = None
+    max_label_length: Optional[float] = None
+
+    def __call__(self, features: List[Dict[str, Union[List[int], np.ndarray]]]) -> Dict[str, np.ndarray]:
+        # split inputs and labels since they have to be of different lengths and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+
+        # reformat list to dict and set to pytorch format
+        batch = self.processor.feature_extractor.pad(
+            input_features,
+            max_length=self.max_input_length,
+            padding=self.input_padding,
+            pad_to_multiple_of=self.pad_input_to_multiple_of,
+            return_tensors="np",
+        )
+
+        labels_batch = self.processor.tokenizer.pad(
+            label_features,
+            max_length=self.max_label_length,
+            padding=self.label_padding,
+            pad_to_multiple_of=self.pad_to_multiple_of_label,
+            return_tensors="np",
+        )
+
+        labels = labels_batch["input_ids"]
+        labels = np.ma.array(labels, mask=np.not_equal(labels_batch.attention_mask, 1))
+        labels = labels.filled(fill_value=-100)
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def get_grouped_indices(
+    dataset, batch_size: int, rng: Optional[List[int]] = None, mega_batch_mult: Optional[int] = None
+) -> np.array:
+    """
+    Adapted from the `get_length_grouped_indices` function in the PyTorch Trainer utils file (https://github.com/huggingface/transformers/blob/main/src/transformers/trainer_pt_utils.py#L486)
+    Function that returns a list of indices in which each slice of `batch_size` consecutive indices correspond to elements of similar
+    lengths. To do this, the indices are:
+
+    - randomly permuted (if a JAX rng is specified)
+    - grouped in mega-batches of size `mega_batch_mult * batch_size`
+    - sorted by length in each mega-batch
+
+    The result is the concatenation of all mega-batches, with the batch of `batch_size` containing the element of
+    maximum length placed first, so that an OOM happens sooner rather than later.
+    """
+    lengths = dataset["input_length"]
+
+    # Default for mega_batch_mult: 50 or the number to get 4 megabatches, whichever is smaller.
+    if mega_batch_mult is None:
+        mega_batch_mult = min(len(lengths) // (batch_size * 4), 50)
+        # Just in case, for tiny datasets
+        if mega_batch_mult == 0:
+            mega_batch_mult = 1
+
+    # We need to use JAX for the random permutation as the PRNG key will be set based on the seed outside of the sampler.
+    num_samples = len(lengths)
+    indices = jax.random.permutation(rng, np.arange(num_samples)) if rng is not None else np.arange(num_samples)
+
+    megabatch_size = mega_batch_mult * batch_size
+    megabatches = [indices[i : i + megabatch_size].tolist() for i in range(0, len(lengths), megabatch_size)]
+    megabatches = [list(sorted(megabatch, key=lambda i: lengths[i], reverse=True)) for megabatch in megabatches]
+
+    # The rest is to get the biggest batch first.
+    # Since each megabatch is sorted by descending length, the longest element is the first
+    megabatch_maximums = [lengths[megabatch[0]] for megabatch in megabatches]
+    max_idx = np.argmax(megabatch_maximums).item()
+    # Switch to put the longest batch in first position
+    # (note that this is different to the PT grouped sampler in which we only put the longest element in the first position, and not its batch)
+    megabatches[0], megabatches[max_idx] = megabatches[max_idx], megabatches[0]
+
+    megabatches = np.array([i for megabatch in megabatches for i in megabatch])
+
+    return megabatches
+
+
+def generate_batch_splits(samples_idx: np.ndarray, batch_size: int, drop_last=True) -> np.ndarray:
+    """Generate batches of data for a specified batch size from sample indices. If the dataset size is not divisible by
+    the batch size and `drop_last` is `True`, the last incomplete batch is dropped. Else, it is returned."""
+    num_samples = len(samples_idx)
+    if drop_last:
+        samples_to_remove = num_samples % batch_size
+        if samples_to_remove != 0:
+            samples_idx = samples_idx[:-samples_to_remove]
+        sections_split = num_samples // batch_size
+        samples_idx = samples_idx.reshape((sections_split, batch_size))
+    else:
+        sections_split = math.ceil(num_samples / batch_size)
+        samples_idx = np.array_split(samples_idx, sections_split)
+    return samples_idx
+
+
+def write_train_metric(summary_writer, train_metrics, train_time, step):
+    summary_writer.scalar("train_time", train_time, step)
+
+    train_metrics = get_metrics(train_metrics)
+    for key, vals in train_metrics.items():
+        tag = f"train_{key}"
+        for i, val in enumerate(vals):
+            summary_writer.scalar(tag, val, step - len(vals) + i + 1)
+
+
+def write_eval_metric(summary_writer, eval_metrics, step, pred_str=None):
+    for metric_name, value in eval_metrics.items():
+        summary_writer.scalar(f"eval_{metric_name}", value, step)
+
+    if pred_str is not None:
+        # write output actual predictions for debugging
+        summary_writer.text("eval_predictions", "\n".join(pred_str), step)
+
+
+def write_wandb_log(metrics, step, prefix=None):
+    if jax.process_index() == 0:
+        log_metrics = {}
+        for k, v in metrics.items():
+            if "layer" in k:
+                log_metrics[f"{k}/"] = v
+            elif prefix is not None:
+                log_metrics[f"{prefix}/{k}"] = v
+            else:
+                log_metrics[k] = v
+        wandb.log(log_metrics, step)
+
+
+def write_wandb_pred(pred_str, label_str, step, num_log=50, prefix="eval"):
+    if jax.process_index() == 0:
+        # convert str data to a wandb compatible format
+        str_data = [[label_str[i], pred_str[i]] for i in range(len(pred_str))]
+        # we'll log the first 50 predictions for each epoch
+        wandb.log(
+            {
+                f"{prefix}/step_{int(step / 1000)}k": wandb.Table(
+                    columns=["label_str", "pred_str"], data=str_data[:num_log]
+                )
+            },
+            step,
+        )
+
+
+def create_learning_rate_fn(
+    num_train_steps: int, num_warmup_steps: int, learning_rate: float
+) -> Callable[[int], jnp.array]:
+    """Returns a linear warmup, linear_decay learning rate function."""
+    warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
+    decay_fn = optax.linear_schedule(
+        init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
+    )
+    schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
+    return schedule_fn
+
+
+def ctc_loss(
+    logits,
+    logits_attention_mask,
+    labels,
+    blank_id,
+    loss_reduction="mean",
+    output_emission_dict=False,
+    log_epsilon=-100000.0,
+):
+    """Computes CTC loss.
+    This function performs forward computation over an FSA with `N * 2` states
+    where `N` is the max number of labels. The states are split into two groups:
+    Phi states and emission states. a phi-state accepts repetition of
+    phi (blank)-symbols and transits to emission state when the correct label is
+    observed. An emission state accepts repetition of the label and transits to
+    the next phi states at any time (so called epsilon-transition).
+    Below, `B` denotes the batch size, `T` denotes the time steps in `logits`,
+    and `N` denotes the time steps in `labels`.
+    Args:
+      logits: (B, T, K)-array containing log-probabilities of each class.
+      logitpaddings: (B, T)-array. Padding indicators for `logits`.
+      labels: (B, N)-array containing reference integer labels.
+      labelpaddings: (B, N)-array. Padding indicators for `labels`. Currently,
+        `labels` must be right-padded, i.e. each row of `labelpaddings` must be
+        repetition of zeroes, followed by repetition of ones.
+      blank_id: Id for blank token.
+      loss_reduction: one of "mean", "sum", "default"
+        - "none": no reduction is applied.
+        - "mean": output loss will be divided by target lengths and then the
+          mean over the batch is taken.
+        - "sum": output loss are summed over batch
+      output_emission_dict: whether to output additional information about the emission probs
+    Returns:
+      A pair of `(per_seq_loss, aux)`.
+      per_seq_loss:
+        (B,)-array containing loss values for each sequence in the batch.
+      aux: Dictionary containing interim variables used for computing losses.
+        aux['logalpha_phi']: (T, B, N+1)-array. Log-forward-probabilities of each
+          phi-state corresponding to the n-th label.
+        aux['logalpha_emit']: (T, B, N)-array. Log-forward-probabilities of each
+          emission-state corresponding to the n-th label.
+        aux['logprobs_phi']: (T, B, 1)-array. Probability of the phi-symbol
+          corresponding to each time frame.
+        aux['logprobs_emit']: (T, B, N)-array. Probability of the n-th label
+          corresponding to each time frame.
+    """
+    # label paddings are indicated by -100
+    labelpaddings = labels < 0
+    # logit paddings are the inverse of attention_mask
+    logitpaddings = ~logits_attention_mask
+
+    # Copied from https://github.com/tensorflow/lingvo/blob/master/lingvo/jax/layers/ctc_objectives.py
+    batchsize, unused_maxinputlen, num_classes = logits.shape
+    batchsize_, maxlabellen = labels.shape
+
+    logprobs = jax.nn.log_softmax(logits)
+    labellens = maxlabellen - jnp.sum(labelpaddings, axis=1).astype(jnp.int32)
+
+    # repeat[b, n] == 1.0 when label[b, n] == label[b, n+1].
+    repeat = (labels[:, :-1] == labels[:, 1:]).astype(jnp.float32)
+    repeat = jnp.pad(repeat, ((0, 0), (0, 1)))
+
+    logprobs_phi = logprobs[:, :, blank_id : blank_id + 1]  # [B, T, 1]
+    logprobs_phi = jnp.transpose(logprobs_phi, (1, 0, 2))  # [T, B, 1]
+
+    one_hot = jax.nn.one_hot(labels, num_classes=num_classes)  # [B, N, K]
+    logprobs_emit = jnp.einsum("btk,bnk->btn", logprobs, one_hot)
+    logprobs_emit = jnp.transpose(logprobs_emit, (1, 0, 2))  # [T, B, N]
+
+    logalpha_phi_init = jnp.ones((batchsize, maxlabellen + 1)) * log_epsilon  # [B, N]
+    logalpha_phi_init = logalpha_phi_init.at[:, 0].set(0.0)
+    logalpha_emit_init = jnp.ones((batchsize, maxlabellen)) * log_epsilon  # [B, N]
+
+    def loop_body(prev, x):
+        prev_phi, prev_emit = prev
+        # emit-to-phi epsilon transition, except if the next label is repetition
+        prev_phi_orig = prev_phi
+        prev_phi = prev_phi.at[:, 1:].set(jnp.logaddexp(prev_phi[:, 1:], prev_emit + log_epsilon * repeat))
+
+        logprob_emit, logprob_phi, pad = x
+
+        # phi-to-emit transition
+        next_emit = jnp.logaddexp(prev_phi[:, :-1] + logprob_emit, prev_emit + logprob_emit)
+        # self-loop transition
+        next_phi = prev_phi + logprob_phi
+        # emit-to-phi blank transition only when the next label is repetition
+        next_phi = next_phi.at[:, 1:].set(
+            jnp.logaddexp(next_phi[:, 1:], prev_emit + logprob_phi + log_epsilon * (1.0 - repeat))
+        )
+
+        pad = pad.reshape((batchsize, 1))
+        next_emit = pad * prev_emit + (1.0 - pad) * next_emit
+        next_phi = pad * prev_phi_orig + (1.0 - pad) * next_phi
+
+        return (next_phi, next_emit), (next_phi, next_emit)
+
+    xs = (logprobs_emit, logprobs_phi, logitpaddings.transpose((1, 0)))
+    _, (logalpha_phi, logalpha_emit) = jax.lax.scan(loop_body, (logalpha_phi_init, logalpha_emit_init), xs)
+
+    # last row needs to be updated with the last epsilon transition
+    logalpha_phi_last = logalpha_phi[-1].at[:, 1:].set(jnp.logaddexp(logalpha_phi[-1, :, 1:], logalpha_emit[-1]))
+    logalpha_phi = logalpha_phi.at[-1].set(logalpha_phi_last)
+
+    # extract per_seq_loss
+    one_hot = jax.nn.one_hot(labellens, num_classes=maxlabellen + 1)  # [B, N+1]
+    per_seq_loss = -jnp.einsum("bn,bn->b", logalpha_phi_last, one_hot)
+
+    if loss_reduction == "mean":
+        target_lengths = labelpaddings.shape[-1] - labelpaddings.sum(axis=-1)
+        loss = (per_seq_loss / target_lengths).mean()
+    elif loss_reduction == "sum":
+        loss = per_seq_loss.sum()
+    else:
+        loss = per_seq_loss
+
+    if not output_emission_dict:
+        return loss
+
+    return loss, {
+        "logalpha_phi": logalpha_phi,
+        "logalpha_emit": logalpha_emit,
+        "logprobs_phi": logprobs_phi,
+        "logprobs_emit": logprobs_emit,
+    }
+
+
+def make_dataset(data_args, seed=42):
+    # Pre-processing dataset
+    import re
+
+    def map_nst(entry):
+        text = entry["text"].lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
+        text = re.sub('[áàâ]', 'a', text)
+        text = re.sub('[ä]', 'æ', text)
+        text = re.sub('[éèëê]', 'e', text)
+        text = re.sub('[íìïî]', 'i', text)
+        text = re.sub('[óòöô]', 'o', text)
+        text = re.sub('[ö]', 'ø', text)
+        text = re.sub('[ç]', 'c', text)
+        text = re.sub('[úùüû]', 'u', text)
+        # text = re.sub('\\(?=(Punktum|Komma|Utropstegn|Spørsmålstegn))', ' ', text)
+        text = re.sub('\s+', ' ', text)
+        return {"text": text}
+
+    def filter_nst(entry):
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.match(entry["type"], "pIW|CA"):
+            return False  # Spelling out words
+        return True
+
+    def filter_npsc(entry):
+        # False if there are digits in the text
+        if not ((len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)):
+            return False  # Too short
+        if re.search("\d", entry["text"]):
+            return False
+        return True
+
+    def map_npsc(entry):
+        batch = {"text": entry["text"].lower()}
+        batch["text"] = re.sub('[áàâ]', 'a', batch["text"])
+        batch["text"] = re.sub('[ä]', 'æ', batch["text"])
+        batch["text"] = re.sub('[éèëê]', 'e', batch["text"])
+        batch["text"] = re.sub('[íìïî]', 'i', batch["text"])
+        batch["text"] = re.sub('[óòöô]', 'o', batch["text"])
+        batch["text"] = re.sub('[ö]', 'ø', batch["text"])
+        batch["text"] = re.sub('[ç]', 'c', batch["text"])
+        batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
+        batch["text"] = re.sub('\s', ' ', batch["text"])
+        batch["text"] = re.sub('<ee>', 'eee', batch["text"])
+        batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
+        batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
+        batch["text"] = re.sub('<inaudible>', 'xxx', batch["text"])
+        # batch["text"] = re.sub('<inaudible>', '?', batch["text"])
+        if "<" in batch["text"]:
+            raise ValueError(batch["text"])
+        return batch
+
+    nst = datasets.load_dataset("NbAiLab/NST", "no-close")
+    npsc = datasets.load_dataset("NbAiLab/NPSC", "16K_mp3")
+    # TODO NST_hesitate
+
+    split = len(npsc[data_args.train_split_name]) / (len(npsc[data_args.train_split_name]) + len(npsc[data_args.eval_split_name]))  # Use same train/val ratio as NPSC
+    nst_train = nst[data_args.train_split_name].train_test_split(train_size=split, seed=seed)
+    nst[data_args.train_split_name] = nst_train["train"]
+    nst[data_args.eval_split_name] = nst_train["test"]
+
+    nst = nst.filter(filter_nst).map(
+        map_nst,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NST",
+    ).shuffle(seed=seed)
+    npsc = npsc.filter(filter_npsc).map(
+        map_npsc,
+        num_proc=data_args.preprocessing_num_workers,
+        desc="filtering NPSC",
+    ).shuffle(seed=seed)
+
+    npsc_base = npsc.remove_columns([col for col in npsc[data_args.train_split_name].column_names if col not in ["text", "audio"]])
+    nst_base = nst.remove_columns([col for col in nst[data_args.train_split_name].column_names if col not in ["text", "audio"]])
+
+    combined = {}
+    for split in data_args.train_split_name, data_args.eval_split_name, data_args.test_split_name:
+        probs = np.array([len(nst_base[split]), len(npsc_base[split])])  # Weight by number of examples
+        probs = (probs / probs.sum()).tolist()
+        comb = datasets.interleave_datasets([nst_base[split], npsc_base[split]], probabilities=probs, seed=seed)
+        combined[split] = comb
+
+    return datasets.DatasetDict(**combined)
+
+def main():
+    # 1. Parse input arguments
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, FlaxTrainingArguments))
+
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # 2. Setup logging
+    # Make one log on every process with the configuration for debugging.
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    # Set the verbosity to info of the Transformers logger.
+    # We only want one process per machine to log things on the screen.
+    logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
+    if jax.process_index() == 0:
+        datasets.utils.logging.set_verbosity_warning()
+        transformers.utils.logging.set_verbosity_info()
+    else:
+        datasets.utils.logging.set_verbosity_error()
+        transformers.utils.logging.set_verbosity_error()
+
+    # Set up wandb run
+    if jax.process_index() == 0:
+        wandb.init(project=data_args.wandb_project, name=data_args.wandb_name, job_type=data_args.wandb_job_type)
+
+    logger.info("Training/evaluation parameters %s", training_args)
+
+    # Set the default TPU matmul precision and display the number of devices
+    jax.config.update("jax_default_matmul_precision", training_args.matmul_precision)
+    logger.info(f"JAX devices: {jax.device_count()}, matmul precision: {training_args.matmul_precision}")
+
+    # 4. Load dataset
+
+    set_seed(training_args.seed)
+    raw_datasets = make_dataset(data_args, seed=training_args.seed)
+   
+    # raw_datasets = DatasetDict()
+
+    # if training_args.do_train:
+    #     raw_datasets[data_args.train_split_name] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.train_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_eval:
+    #     raw_datasets[data_args.eval_split_name] = load_dataset(
+    #         data_args.dataset_name,
+    #         data_args.dataset_config_name,
+    #         split=data_args.eval_split_name,
+    #         cache_dir=data_args.dataset_cache_dir,
+    #         use_auth_token=True if model_args.use_auth_token else None,
+    #     )
+
+    # if training_args.do_predict:
+    #     test_split = data_args.test_split_name.split("+")
+    #     for split in test_split:
+    #         raw_datasets[split] = load_dataset(
+    #             data_args.dataset_name,
+    #             data_args.dataset_config_name,
+    #             split=split,
+    #             cache_dir=data_args.dataset_cache_dir,
+    #             use_auth_token=True if model_args.use_auth_token else None,
+    #         )
+
+    if not training_args.do_train and not training_args.do_eval and not training_args.do_predict:
+        raise ValueError(
+            "Cannot not train, not do evaluation and not do prediction. At least one of "
+            "training, evaluation or prediction has to be done."
+        )
+
+    # if not training, there is no need to run multiple epochs
+    if not training_args.do_train:
+        training_args.num_train_epochs = 1
+
+    if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--audio_column_name` to the correct audio column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names:
+        raise ValueError(
+            f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+            "Make sure to set `--text_column_name` to the correct text column - one of "
+            f"{', '.join(next(iter(raw_datasets.values())).column_names)}."
+        )
+
+    # 5. Load pretrained model, tokenizer, and feature extractor
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    config = Wav2Vec2Config.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    # update config according to training args, model args, and tokenizer attributes
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "ctc_zero_infinity": model_args.ctc_zero_infinity,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": tokenizer.vocab_size,  # len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+
+    if tokenizer.do_lower_case and data_args.dataset_name != "librispeech_asr":
+        raise ValueError(
+            "Setting the tokenizer attribute `do_lower_case` to `True` converts all input strings to "
+            "uppercase prior to tokenization. This should only be done when the tokenizer is built on an uppercased corpus,"
+            "i.e. for the dataset `librispeech_asr` only. If your dataset is not `librispeech_asr`, the tokenizer is mostly likely "
+            "built on an lowercased corpus. In this case, set `tokenizer.do_lower_case` to ``False`."
+        )
+
+    if training_args.precision == "full_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = True
+    elif training_args.precision == "half_mixed":
+        dtype = jnp.bfloat16
+        training_args.mixed_precision = False
+    else:
+        dtype = jnp.float32
+        training_args.mixed_precision = False
+
+    model = FlaxWav2Vec2ForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        config=config,
+        dtype=dtype,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        from_pt=True,
+    )
+
+    # 6. Resample speech dataset ALWAYS
+    raw_datasets = raw_datasets.cast_column(
+        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+    )
+
+    # 7. Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    max_input_length = int(data_args.max_duration_in_seconds * feature_extractor.sampling_rate)
+    min_input_length = int(data_args.min_duration_in_seconds * feature_extractor.sampling_rate)
+    max_target_length = data_args.max_label_length
+    min_target_length = data_args.min_label_length
+    pad_input_to_multiple_of = data_args.pad_input_to_multiple_of
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    text_column_name = data_args.text_column_name
+    model_input_name = feature_extractor.model_input_names[0]
+    do_lower_case = data_args.do_lower_case
+    dataset_name = data_args.dataset_name
+    chars_to_ignore = ', ? . ! - ; : " “ % ‘ ” ?'.split(" ")
+    chars_to_ignore_regex = f'[{"".join(chars_to_ignore)}]'
+    # gigaspeech_punctuation = {" <comma>": ",", " <period>": ".", " <questionmark>": "?", " <exclamationpoint>": "!"}
+    # gigaspeech_disfluencies = ["<other>", "<sil>"]
+    # swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "<a_aside>", "<b_aside>", "<e_aside>", "[laughter-",
+    #                 "[vocalized-noise]", "_1"]
+    # swb_punctuations = ["{", "}", "[", "]-", "]"]
+    # earnings_disfluencies = ["<crosstalk>", "<affirmative>", "<inaudible>", "inaudible", "<laugh>", "<unk>"]
+    ignore_segments = ["ignore_time_segment_in_scoring", "<noise>", "<music>", "[noise]", "[laughter]", "[silence]",
+                       "[vocalized-noise]", "<crosstalk>", "<affirmative>", "<inaudible>", "<laugh>", "<other>", "<sil>", ""]
+
+    if training_args.do_train and data_args.max_train_samples is not None:
+        raw_datasets[data_args.train_split_name] = raw_datasets[data_args.train_split_name].select(range(data_args.max_train_samples))
+
+    if training_args.do_eval and data_args.max_eval_samples is not None:
+        raw_datasets[data_args.eval_split_name] = raw_datasets[data_args.eval_split_name].select(range(data_args.max_eval_samples))
+
+    if training_args.do_predict and data_args.max_test_samples is not None:
+        raw_datasets[data_args.test_split_name] = raw_datasets[data_args.test_split_name].select(range(data_args.max_eval_samples))
+
+    if training_args.do_train and data_args.remove_punctuation:
+
+        def remove_punctuation(batch):
+            batch[text_column_name] = (
+                re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("'", "").replace('"', "")
+            )
+
+        raw_datasets[data_args.train_split_name] = raw_datasets[data_args.train_split_name].map(
+            remove_punctuation,
+            num_proc=data_args.preprocessing_num_workers,
+            desc="removing punctuation from train split",
+        )
+
+    # filter data where the targets are ignored in scoring
+    def is_target_labels(input_str):
+        return input_str.lower() not in ignore_segments
+
+    raw_datasets = raw_datasets.filter(
+            is_target_labels,
+            num_proc=num_workers,
+            input_columns=[text_column_name],
+            desc="filtering data where the targets are ignored in scoring",
+        )
+
+    def prepare_dataset(batch):
+        # process audio
+        try:
+            sample = batch[audio_column_name]
+        except ValueError:
+            sample = {"array": np.array([0.]), "sampling_rate": feature_extractor.sampling_rate}
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        # process audio length
+        batch[model_input_name] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+
+        # process targets
+        input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
+
+        # if dataset_name == "google/xtreme_s":
+        #     # Finally, we tokenize the processed text
+        #     batch["labels"] = tokenizer(input_str).input_ids
+        #     batch["labels_length"] = len(batch["labels"])
+        #     return batch
+
+        # # Common Voice 9
+        # if input_str.startswith('"') and input_str.endswith('"'):
+        #     # we can remove trailing quotation marks as they do not affect the transcription
+        #     input_str = input_str[1:-1]
+        # # normalize quotation marks
+        # input_str = re.sub(r'["“”]', '"', input_str)
+        # # normalize apostrophes
+        # input_str = re.sub(r"[’']", "'", input_str)
+        # # normalize hyphens
+        # input_str = re.sub(r"[—–]", "-", input_str)
+        # # replace double quotation marks with single
+        # input_str = input_str.replace('""', '"')
+        # if dataset_name == "mozilla-foundation/common_voice_9_0" and len(input_str):
+        #     # for CV9, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # TEDLIUM-3
+        # # delete the <unk> token from the text and replace spaced apostrophes with un-spaced
+        # input_str = input_str.replace("<unk>", "").replace(" '", "'")
+
+        # # GigaSpeech
+        # for disfluency in gigaspeech_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # convert spelled out punctuation to symbolic form
+        # for punctuation, replacement in gigaspeech_punctuation.items():
+        #     input_str = input_str.replace(punctuation, replacement)
+        # if dataset_name == "speechcolab/gigaspeech" and len(input_str):
+        #     # for GS, we'll normalize the text to always finish with punctuation
+        #     if input_str[-1] not in [".", "?", "!"]:
+        #         input_str = input_str + "."
+
+        # # SWB
+        # for disfluency in swb_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # remove parenthesised text (test data only)
+        # input_str = re.sub("[\(].*?[\)]", "", input_str)
+        # for punctuation in swb_punctuations:
+        #     input_str = input_str.replace(punctuation, "")
+        # # replace anomalous words with their correct transcriptions
+        # split_str = input_str.split("/")
+        # if len(split_str) > 1:
+        #     input_str = " ".join(
+        #         [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]])
+
+        # # Earnings 22
+        # for disfluency in earnings_disfluencies:
+        #     input_str = input_str.replace(disfluency, "")
+        # # replace mal-formatted ellipsis
+        # input_str = input_str.replace("…", ".")
+
+        # JIWER compliance
+        # remove multiple spaces
+        input_str = re.sub(r"\s\s+", " ", input_str)
+        # strip trailing spaces
+        input_str = input_str.strip()
+
+        # Finally, we tokenize the processed text
+        batch["labels"] = tokenizer(input_str).input_ids
+        batch["labels_length"] = len(batch["labels"])
+        return batch
+
+    vectorized_datasets = raw_datasets.map(
+        prepare_dataset,
+        remove_columns=next(iter(raw_datasets.values())).column_names,
+        num_proc=num_workers,
+        desc="preprocess dataset",
+    )
+
+    # filter data with inputs shorter than min_input_length or longer than max_input_length
+    def is_audio_in_length_range(length):
+        return length > min_input_length and length < max_input_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_audio_in_length_range,
+        num_proc=num_workers,
+        input_columns=["input_length"],
+    )
+
+    # filter data with targets shorter than min_target_length or longer than max_target_length
+    def is_labels_in_length_range(length):
+        return length > min_target_length  # and length < max_target_length
+
+    vectorized_datasets = vectorized_datasets.filter(
+        is_labels_in_length_range,
+        num_proc=num_workers,
+        input_columns=["labels_length"],
+    )
+
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with `args.preprocessing_only` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step `args.preprocessing_only` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        cache = {k: v.cache_files for k, v in vectorized_datasets.items()}
+        logger.info(f"Data preprocessing finished. Files cached at {cache}.")
+        return
+
+    # 8. Load Metrics
+    wer_metric = load_metric("wer")
+    cer_metric = load_metric("cer")
+
+    def compute_metrics(pred_ids: List[List[int]], label_ids: List[List[int]]):
+        padded_ids = np.where(np.asarray(label_ids) == -100, tokenizer.pad_token_id, np.asarray(label_ids))
+
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(padded_ids, group_tokens=False)
+
+        wer = wer_metric.compute(predictions=pred_str, references=label_str)
+        cer = cer_metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer, "cer": cer}, pred_str, label_str
+
+    # 9. save feature extractor, tokenizer and config
+    feature_extractor.save_pretrained(training_args.output_dir)
+    tokenizer.save_pretrained(training_args.output_dir)
+    config.save_pretrained(training_args.output_dir)
+
+    processor = AutoProcessor.from_pretrained(training_args.output_dir)
+
+    data_collator = FlaxDataCollatorSpeechSeq2SeqWithPadding(
+        processor=processor,
+        input_padding="longest",
+        pad_input_to_multiple_of=pad_input_to_multiple_of,
+        max_label_length=data_args.max_label_length,
+    )
+
+    # Enable tensorboard only on the master node
+    has_tensorboard = is_tensorboard_available()
+    if has_tensorboard and jax.process_index() == 0:
+        try:
+            from flax.metrics.tensorboard import SummaryWriter
+
+            summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
+        except ImportError as ie:
+            has_tensorboard = False
+            logger.warning(
+                f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
+            )
+    else:
+        logger.warning(
+            "Unable to display metrics through TensorBoard because the package is not installed: "
+            "Please run `pip install tensorboard` to enable."
+        )
+
+    # 10. Handle the repository creation
+    if training_args.push_to_hub:
+        with open(os.path.join(training_args.output_dir, ".gitattributes"), "r+") as f:
+            git_lfs_extensions = f.read()
+            if "*.wandb" not in git_lfs_extensions:
+                f.write("*.wandb filter=lfs diff=lfs merge=lfs -text")
+        if training_args.hub_model_id is None:
+            repo_name = get_full_repo_name(
+                Path(training_args.output_dir).absolute().name, token=training_args.hub_token
+            )
+        else:
+            repo_name = training_args.hub_model_id
+        repo = Repository(training_args.output_dir, clone_from=repo_name)
+
+    # 11. Initialize our training
+    rng = jax.random.PRNGKey(training_args.seed)
+    rng, dropout_rng = jax.random.split(rng)
+
+    # Store some constants
+    max_steps = int(training_args.max_steps)
+    gradient_accumulation_steps = int(training_args.gradient_accumulation_steps)
+    train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
+    batch_size_per_update = train_batch_size * gradient_accumulation_steps
+    per_device_eval_batch_size = int(training_args.per_device_eval_batch_size)
+    eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
+    to_dtype = to_bf16 if training_args.mixed_precision else to_fp32
+
+    if training_args.do_train:
+        num_train_samples = len(vectorized_datasets[data_args.train_split_name])
+        steps_per_epoch = num_train_samples // batch_size_per_update
+        if max_steps > 0:
+            num_epochs = -(training_args.max_steps // -steps_per_epoch)
+            total_train_steps = max_steps
+        else:
+            num_epochs = int(training_args.num_train_epochs)
+            total_train_steps = steps_per_epoch * num_epochs
+
+        # Create learning rate schedule
+        # Create learning rate schedule
+        linear_decay_lr_schedule_fn = create_learning_rate_fn(
+            total_train_steps,
+            training_args.warmup_steps,
+            training_args.learning_rate,
+        )
+
+        # We use Optax's "masking" functionality to not apply weight decay
+        # to bias and LayerNorm scale parameters. decay_mask_fn returns a
+        # mask boolean with the same structure as the parameters.
+        # The mask is True for parameters that should be decayed.
+        # Note that this mask is specifically adapted for FlaxWav2Vec2 and FlaxBart.
+        # For FlaxT5, one should correct the layer norm parameter naming
+        # accordingly - see `run_t5_mlm_flax.py` e.g.
+        def decay_mask_fn(params):
+            flat_params = traverse_util.flatten_dict(params)
+            layer_norm_params = [
+                (name, "scale")
+                for name in ["layer_norm", "self_attn_layer_norm", "layernorm_embedding", "final_layer_norm"]
+            ]
+            flat_mask = {path: (path[-1] != "bias" and path[-2:] not in layer_norm_params) for path in flat_params}
+            return traverse_util.unflatten_dict(flat_mask)
+
+        if training_args.adafactor:
+            # Create Adafactor optimizer
+            optim = optax.adafactor(
+                learning_rate=linear_decay_lr_schedule_fn,
+                dtype_momentum=jnp.bfloat16 if training_args.mixed_precision else jnp.float32,
+                weight_decay_rate=training_args.weight_decay,
+                weight_decay_mask=decay_mask_fn,
+            )
+        else:
+            # Create AdamW optimizer
+            optim = optax.adamw(
+                learning_rate=linear_decay_lr_schedule_fn,
+                b1=training_args.adam_beta1,
+                b2=training_args.adam_beta2,
+                eps=training_args.adam_epsilon,
+                weight_decay=training_args.weight_decay,
+                mask=decay_mask_fn,
+            )
+
+        # Optax MultiSteps for gradient accumulation. We'll only call this optimizer transformation if gradient accumulation is required (i.e. gradient accumulation steps > 1)
+        if training_args.multisteps and gradient_accumulation_steps > 1:
+            optim = optax.MultiSteps(optim, gradient_accumulation_steps, use_grad_mean=False)
+    else:
+        num_epochs = 0
+        total_train_steps = 0
+        num_train_samples = 0
+        optim = None
+
+    # Setup train state
+    state = MixedPrecisionTrainState.create(
+        apply_fn=model.__call__,
+        get_attention_mask_fn=model._get_feature_vector_attention_mask,
+        params=model.params,
+        tx=optim,
+        to_dtype=to_dtype,
+        dropout_rng=dropout_rng,
+        max_grad_norm=training_args.max_grad_norm,
+    )
+
+    # Replicate the train state on each device
+    state = state.replicate()
+    blank_id = model.config.pad_token_id
+
+    # Define gradient update step fn
+    def train_step(state, batch):
+        # only one single rng per grad step, with or without accumulation, as the graph should be identical over one effective training batch
+        dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
+
+        def compute_loss(params, minibatch):
+            labels = minibatch.pop("labels")
+            logits = state.apply_fn(
+                **minibatch,
+                params=params,
+                dropout_rng=dropout_rng,
+                freeze_feature_encoder=model_args.freeze_feature_encoder,
+                train=True,
+            )[0]
+            logits_mask = state.get_attention_mask_fn(logits.shape[1], batch["attention_mask"])
+            loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+            return loss
+
+        grad_fn = jax.value_and_grad(compute_loss)
+
+        if gradient_accumulation_steps == 1 or training_args.multisteps:
+            loss, grad = grad_fn(to_dtype(state.params), batch)
+
+        # Custom gradient accumulation
+        else:
+            # add a first dimension over gradient_accumulation_steps for minibatch slices
+            batch = jax.tree_map(
+                lambda x: x.reshape(
+                    gradient_accumulation_steps, training_args.per_device_train_batch_size, *x.shape[1::]
+                ),
+                batch,
+            )
+
+            def accum_minibatch_step(accum_grad, minibatch):
+                # compute loss, num labels and grad over minibatch and accumulate
+                loss, grad = grad_fn(to_dtype(state.params), minibatch)
+                return jax.tree_map(jnp.add, accum_grad, grad), loss
+
+            # create an initial state for accumulating losses, num labels and gradients
+            init_grad = jax.tree_map(jnp.zeros_like, to_dtype(state.params))
+            # loop accum minibatch step over the number of gradient accumulation steps
+            grad, loss = jax.lax.scan(accum_minibatch_step, init_grad, batch)
+
+        # update state
+        new_state = state.apply_gradients(
+            grads=grad,
+            dropout_rng=new_dropout_rng,
+            to_dtype=to_dtype,
+        )
+
+        # compute gradient norms over all layers and globally for detailed monitoring
+        layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+        logs = {
+            "layer_grad_norm": layer_grad_norm,
+            "grad_norm": jnp.linalg.norm(jax.tree_util.tree_leaves(layer_grad_norm)),
+        }
+
+        # compute parameter norms over all layers and globally for detailed monitoring
+        layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+        logs["layer_param_norm"] = layer_param_norm
+        logs["param_norm"] = jnp.linalg.norm(jax.tree_util.tree_leaves(layer_param_norm))
+
+        metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
+        metrics.update(logs)
+
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+
+        return new_state, metrics
+
+    # Define eval fn
+    def eval_step(params, batch):
+        labels = batch.pop("labels")
+        logits = model(**batch, params=params, train=False)[0]
+
+        logits_mask = model._get_feature_vector_attention_mask(logits.shape[1], batch["attention_mask"])
+        loss = ctc_loss(logits, logits_mask, labels, blank_id, loss_reduction="mean")
+
+        pred_ids = jnp.argmax(logits, axis=-1)
+
+        # summarize metrics
+        metrics = {"loss": loss}
+        metrics = jax.lax.pmean(metrics, axis_name="batch")
+        # metrics = to_fp32(metrics)
+        return metrics, pred_ids
+
+    # Create parallel version of the train and eval step
+    if training_args.do_train:
+        p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
+
+    if training_args.do_eval:
+        p_eval_step = jax.pmap(eval_step, "batch")
+
+    def run_evaluation(step):
+        if training_args.do_eval:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the eval dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[data_args.eval_split_name], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc="Evaluating ...", position=2)):
+                samples = [vectorized_datasets[data_args.eval_split_name][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, step, prefix="eval")
+            write_wandb_pred(pred_str, label_str, step)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, step, pred_str=pred_str)
+
+    def save_checkpoint(step):
+        # save and push checkpoint to the hub
+        if jax.process_index() == 0:
+            params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+            model.save_pretrained(training_args.output_dir, params=params)
+            tokenizer.save_pretrained(training_args.output_dir)
+            if training_args.push_to_hub:
+                repo.push_to_hub(commit_message=f"{wandb.run.id}: saving weights and logs of step {int(step / 1000)}k", blocking=False)
+
+    logger.info("***** Running training *****")
+    logger.info(f"  Num examples = {num_train_samples}")
+    logger.info(f"  Num Epochs = {num_epochs}")
+    logger.info(f"  Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
+    logger.info(f"  Num gradient accumulation steps = {gradient_accumulation_steps}")
+    logger.info(f"  Total train batch size (w. parallel & distributed) = {batch_size_per_update}")
+    logger.info(f"  Total optimization steps = {total_train_steps}")
+    logger.info(f"  Gradient checkpointing: {config.gradient_checkpointing}")
+    logger.info(f"  Use scan: {config.use_scan}")
+    logger.info(f"  Fuse matmuls: {config.fuse_matmuls}")
+
+    train_time = cur_step = 0
+    epochs = tqdm(range(num_epochs), desc=f"Epoch ... (1/{num_epochs})", position=0)
+    for epoch in epochs:
+        if training_args.do_train:
+            # ======================== Training ================================
+            train_start = time.time()
+
+            # Create sampling rng
+            rng, input_rng = jax.random.split(rng)
+
+            # Generate an epoch by randomly shuffling sampling indices from the train dataset and grouping by length
+            train_samples_idx = get_grouped_indices(vectorized_datasets[data_args.train_split_name], batch_size_per_update, input_rng)
+            train_batch_idx = generate_batch_splits(train_samples_idx, batch_size_per_update)
+
+            # Gather the indices for creating the batch and do a training step
+            for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1), 1):
+                samples = [vectorized_datasets[data_args.train_split_name][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                batch = shard(batch.data)
+                try:
+                    state, train_metric = p_train_step(state, batch)
+                except TypeError as e:
+                    logger.warning("Encountered following error: \n", e)
+
+                cur_step = epoch * (num_train_samples // batch_size_per_update) + step
+
+                if cur_step % training_args.logging_steps == 0:
+                    # Save metrics
+                    train_metric = unreplicate(train_metric)
+                    train_time += time.time() - train_start
+                    # need to upcast all device arrays to fp32 for wandb logging (jnp.bfloat16 not supported) -> do this here OR in train_step
+                    write_wandb_log(to_fp32(train_metric), cur_step, prefix=data_args.train_split_name)
+                    # we won't log to tensorboard for now (it is fiddly logging param and grad norms on a layer-by-layer basis)
+                    # if has_tensorboard and jax.process_index() == 0:
+                    # write_train_metric(summary_writer, train_metrics, train_time, cur_step)
+
+                    epochs.write(
+                        f"Step... ({cur_step} | Loss: {train_metric['loss']}, Learning Rate: {train_metric['learning_rate']}, Gradient Norm: {train_metric['grad_norm']})"
+                    )
+
+                if cur_step % total_train_steps == 0:
+                    break
+
+                if training_args.eval_steps and cur_step % training_args.eval_steps == 0:
+                    run_evaluation(cur_step)
+
+                if cur_step % training_args.save_steps == 0:
+                    save_checkpoint(cur_step)
+
+            if training_args.eval_steps == 0 and (epoch + 1) != num_epochs:
+                # run evaluation at the end of the epoch if eval steps are not specified
+                run_evaluation(cur_step)
+                save_checkpoint(cur_step)
+
+    if training_args.do_train:
+        save_checkpoint(cur_step)
+
+    cur_step = max_steps if max_steps > 0 else cur_step  # set step to max steps so that eval happens in alignment with training
+
+    if training_args.do_eval:
+        run_evaluation(cur_step)
+
+    # TODO: collapse 'do_predict' into the run_evaluation function
+    if training_args.do_predict:
+        for split in [data_args.test_split_name]:
+            # ======================== Evaluating ==============================
+            eval_metrics = []
+            eval_preds = []
+            eval_labels = []
+
+            # Generate eval set by sequentially sampling indices from the test dataset and grouping by length
+            eval_samples_idx = get_grouped_indices(vectorized_datasets[split], eval_batch_size)
+            eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size, drop_last=False)
+
+            for i, batch_idx in enumerate(tqdm(eval_batch_idx, desc=f"Predicting {split}...", position=2)):
+                samples = [vectorized_datasets[split][int(idx)] for idx in batch_idx]
+                batch = data_collator(samples)
+                labels = batch["labels"]
+
+                metrics, pred_ids = pad_shard_unpad(p_eval_step)(state.params, batch.data, min_device_batch=per_device_eval_batch_size)
+                eval_preds.extend(jax.device_get(pred_ids.reshape(-1, pred_ids.shape[-1])))
+                eval_metrics.append(metrics)
+
+                eval_labels.extend(labels)
+
+            # normalize eval metrics
+            eval_metrics = get_metrics(eval_metrics)
+            eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
+            eval_metrics = to_fp32(eval_metrics)
+
+            # always run compute metrics
+            error_rate_metric, pred_str, label_str = compute_metrics(eval_preds, eval_labels)
+            eval_metrics.update(error_rate_metric)
+            error_rate_desc = " ".join([f"Eval {key}: {value} |" for key, value in error_rate_metric.items()])
+
+            # Print metrics and update progress bar
+            desc = f"Step... ({cur_step}/{total_train_steps} | Eval Loss: {eval_metrics['loss']} | {error_rate_desc})"
+            epochs.write(desc)
+            epochs.desc = desc
+
+            # Save metrics
+            write_wandb_log(eval_metrics, cur_step, prefix=split)
+            write_wandb_pred(pred_str, label_str, cur_step, prefix=split)
+            # if has_tensorboard and jax.process_index() == 0:
+            # write_eval_metric(summary_writer, eval_metrics, cur_step, pred_str=pred_str)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wandb/run-20220730_174606-j2u4n7h4/files/config.yaml b/wandb/run-20220730_174606-j2u4n7h4/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..08ae650be81ea946dac3ad521619cd456d438730
--- /dev/null
+++ b/wandb/run-20220730_174606-j2u4n7h4/files/config.yaml
@@ -0,0 +1,27 @@
+wandb_version: 1
+
+_wandb:
+  desc: null
+  value:
+    cli_version: 0.12.9
+    code_path: code/run_flax_speech_recognition_ctc.py
+    framework: huggingface
+    huggingface_version: 4.21.0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    python_version: 3.8.10
+    start_time: 1659203166
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 11
+      - 12
+      3:
+      - 13
+      4: 3.8.10
+      5: 0.12.9
+      6: 4.21.0
+      8:
+      - 5
diff --git a/wandb/run-20220730_174606-j2u4n7h4/files/diff.patch b/wandb/run-20220730_174606-j2u4n7h4/files/diff.patch
new file mode 100644
index 0000000000000000000000000000000000000000..39b46e1762f7e4b4f330f4ff487b597cc5496642
--- /dev/null
+++ b/wandb/run-20220730_174606-j2u4n7h4/files/diff.patch
@@ -0,0 +1,10 @@
+diff --git a/.gitattributes b/.gitattributes
+index 755356a..f0eef4b 100644
+--- a/.gitattributes
++++ b/.gitattributes
+@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+ *.zip filter=lfs diff=lfs merge=lfs -text
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
++*.wandb filter=lfs diff=lfs merge=lfs -text
+\ No newline at end of file
diff --git a/wandb/run-20220730_174606-j2u4n7h4/files/output.log b/wandb/run-20220730_174606-j2u4n7h4/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..816e7ad2c23d6e51ba7659b648665cae3ac597b4
--- /dev/null
+++ b/wandb/run-20220730_174606-j2u4n7h4/files/output.log
@@ -0,0 +1,2576 @@
+INFO:__main__:Training/evaluation parameters FlaxTrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=1000,
+evaluation_strategy=steps,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=./runs/Jul30_17-46-02_t1v-n-eedfb410-w-0,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=100,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+matmul_precision=bfloat16,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+multisteps=False,
+no_cuda=False,
+num_train_epochs=40.0,
+optim=adamw_hf,
+output_dir=./,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=12,
+per_device_train_batch_size=12,
+precision=full_mixed,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=./,
+save_on_each_node=False,
+save_steps=1000,
+save_strategy=steps,
+save_total_limit=5,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=4000,
+weight_decay=0.0,
+xpu_backend=None,
+)
+INFO:__main__:JAX devices: 8, matmul precision: bfloat16
+WARNING:datasets.builder:Reusing dataset nst (/home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53)
+100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 78.35it/s]
+WARNING:datasets.builder:Reusing dataset npsc (/home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc)
+100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 467.63it/s]
+WARNING:datasets.arrow_dataset:Loading cached split indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a18d58183d9bf996.arrow and /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f883e246d28776da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1ecb897badea6b99.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e8c53f9b4a092be2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-99016c8af960e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d7675bb64e8cbb95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-35cb526c6e844fe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ef6aa8735c2e25f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b96eb3221fd7bdcd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f88d447c9e96a29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1db3024fc21398d0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fe9bf9aa3972dc9e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d47ebd3444326a96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-100513a9bb58a7d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eae23efdd20c9820.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-47204e714dab1e26.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ef97747360cf8f77.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-849c0d5e70b1eae6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9ee4d61ca1de5fd3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64d9a4db7163286d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-93d3f904dbd9dfed.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-048d205a760fb7b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8252e7452ed22a3f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-b46b71c0a44ac025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-981266ba1dfee0dd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-836e0b13e6c79682.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4c07ad0c6e9209a9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6755dbc96791ea74.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30999bac01ddf169.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-88ce81bdaf3537c7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4462dee818c7228a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-dbd3462f0b7ec1ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2c1eabbcdb92ac67.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8c4ca51a902b3378.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-39cf8874cdcb5fad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-72a154bf3995be4e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-83a401ee1a5ae4b0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f7c303b13c9787f5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-deb16df6d6f11098.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e2ed8b7ee6a49bbf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a89f8a703c382829.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ad6ae0c2c5b5db00.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-718dab4c699166bc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07acbb44d2332ddf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-aa82cb01bcd0315e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a02927894152e700.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7bb336297bc0fe6b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c453683ef7d1f91f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-87430deb9a558175.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-364293ab4f931379.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-8d53639da64e470b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e51ebb4af271a8d2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0f77b97508d35aa2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6ddb824270d7c1d3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4fe4b3c17e4ee8f6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-22b72f0bacf3763a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d44354918759d63c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-1a3934bd8e1c854c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-eadbb6bd5b728b54.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c366355744a6b4fb.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-fd746182fc420273.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-64ffdd3462151b96.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c050f920ab9f5bf5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-0e1e01c9f06c1bf2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-decafc8505e37441.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9a03142b724fdaef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-76579b4a85e95b63.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-c2a806b0458860dc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6a3cbf3c5b456cef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-37cabe610bd73f12.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-daa1f848623a5f8b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2691698209721423.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ba587113d73c0469.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b7a826ff62e9190.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-f48a7e48ca1be4a1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-ea1bea731e738d53.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9bbb6bdad70a6bc4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-14e88ec571f8c10c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-12e5c26566ad2467.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7e04bd4017d30913.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-3623af720c33578b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-de63ddccbaaa2408.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5922723cdae1242a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6e64af507b54c63d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-6cc574fd29691232.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-71e0a09e89b40263.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-cdef3fde897c2328.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5346080ba33a9efa.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-d5b0c95ba6dddb6e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7b39c3af46b09ae0.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-973f3faadf8c59ce.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-31d5502f147ff08d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-e5065fe32e9be0e1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a74fe01a603aebe1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-07572c953bfc5d29.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-50df9810116a3f1c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4b2bc354a7716465.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-7298022e3f7e9c11.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-5f1b0cfc50c27c43.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-2a37559661ad05de.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-00ad27889f41a2e4.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-a84d3ea2f9c53bb3.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-860c257305cbd095.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-4107db6b55e886b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2a7d46aeb9705209.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5465b0578d4d1dd6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dd436d3509962c33.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81e279f107529ddd.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0f6d5e486066b438.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5196a627dcb5575b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c40a3b109e32fdcf.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0010944e837ede95.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-77d3c26a1d78844f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2ca09d43b867639.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cf1012298a4f080f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-22cdaa8b64a3143d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b42663e1783f7f2d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe2d3a4def8e2e27.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8bebcd60bda2ac82.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8b63b4e4da3cc4ca.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2c9c07d9c528c424.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-03d13a49f91a0350.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7f5b8eae60c52bd1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e383963499ecb6a8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c644a1d85fd2789f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-edafc7330613f669.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fe06f91fbf084a48.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bf654e25f5915f4.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-804ffcf68527c977.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f3c558563706248e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-686bbe2ae9f6115c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e37701abaca6e19d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ad3854e8f2fb2252.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d404e04fb3f77dff.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6994281014f7cb8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6eb6782ef81ab287.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2c09930a2e9c5d6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d2137ee267f7e063.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3ce2d95d3d7df934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b7d05394fb392e55.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b8e5af9229e97ab3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1f3b605719428947.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-24da4e84c07bc816.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-38b8ba74c4e2559c.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a0769aba5df331b7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-986ad0784997b447.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-abc305b67cf98c81.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-81fe433ae04ce1d9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b4043a33396e06ad.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-b2a4386334d34964.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e5e0f56bc0836ef6.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-0b4881a47596a8b5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8d88027dc513f3f2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2bbc98a1e85edcf7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-58671958e0bce2ab.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1b8d037b59bdfe44.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c6fddf4c4bdb3cf3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-6f9d5c03685a50b1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-c115605b8f1c44d8.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7267f62f29b4d8b2.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1224fb2796c62199.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-192f3cb60e9dbb91.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-53a889aa100b8e34.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5ec8414a7e29ed0b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-97b78bc6e204c38b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7130e0c7c26247e9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-74d6344ccf6f0513.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-eb64974c7f40c5d7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-60b4b6d34c00edc7.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-591ff73292ab64d5.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d7bb3d84d987ec16.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-093f253a3bc3c623.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-5d561e097b476d1e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-dfa3bd868fdb8264.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fb9da51706446e03.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-f2817e2a00de495b.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-8e3f84eb9a986f8e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-26c33f08cbef01da.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-65dd2d48770a670f.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4f35f67b714124ef.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-61a7b1dd733379c1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bd2063716b88c5e3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-1138c5a00fe2cdf9.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-fad78b72fcc70083.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-791904043c817c80.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-ecfeeb161e769e6d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3b44f9e190a56d08.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-121ec2910dd9950a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bc0a6f115b1e0c7d.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-2cb67d1b83b5483e.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-cfd2a5ebc43e35cc.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-84a9f5f352433666.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-9a7c8f6ad347a417.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-355e6a58a8699922.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3d57c8c4d698ef05.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-48d50bfb849e2ce3.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-a4175a5390dc6934.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-4b379c14df26aae1.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-d25b011318a9820a.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-e204efc6b9dec025.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-be94056ab8967994.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-169c7a04853fedfe.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-3c975d98fca8b01e.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-72754550393bd27b.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-7bf74cab31d17a04.arrow
+WARNING:datasets.arrow_dataset:Loading cached shuffled indices for dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___npsc/16K_mp3/1.0.0/7e6298330579d622e10a8d4637959161ad242d9af9212c25aaa958b55df675fc/cache-bde0796d68afa9b7.arrow
+loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/config.json from cache at /home/javierr/.cache/huggingface/transformers/19f816c26d6fef49a4dfc0fc6b841c37792a250d2697d8432769f8af5698f1dc.90dd5f300087b6277c408283c36aefa2efb15afd0d3e210b3a7c3f3efc478d03
+Model config Wav2Vec2Config {
+  "activation_dropout": 0.0,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.075,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/preprocessor_config.json from cache at /home/javierr/.cache/huggingface/transformers/a33f3a17d64af3dc7d6af9fe68e6fcbe153fd601b94de9e09ff27c6916beed02.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-1b/resolve/main/pytorch_model.bin from cache at /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+Loading PyTorch weights from /home/javierr/.cache/huggingface/transformers/1b10cc2ca6452defe09996da7c9e2d4a9550e22295550c3659e8b1895c97921a.46e33ebecb25f07f270ccfa8896911dd7aeba6b646f8724d18117987926c98db
+PyTorch checkpoint contains 965,514,752 parameters.
+Some weights of the model checkpoint at facebook/wav2vec2-xls-r-1b were not used when initializing FlaxWav2Vec2ForCTC: {('quantizer', 'codevectors'), ('project_hid', 'kernel'), ('project_q', 'kernel'), ('project_hid', 'bias'), ('project_q', 'bias'), ('quantizer', 'weight_proj', 'kernel'), ('quantizer', 'weight_proj', 'bias')}
+- This IS expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing FlaxWav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of FlaxWav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-1b and are newly initialized: {('lm_head', 'bias'), ('lm_head', 'kernel')}
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/data/flax/lib/python3.8/site-packages/transformers/modeling_flax_utils.py:904: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  param_dtypes = jax.tree_map(lambda x: x.dtype, state)
+removing punctuation from train split #0:  18%|██████████████████████████████████▊                                                                                                                                                                  | 1683/9523 [00:00<00:00, 8421.50ex/s]
+removing punctuation from train split #1:   8%|████████████████▏                                                                                                                                                                                     | 781/9523 [00:00<00:01, 7808.41ex/s]
+removing punctuation from train split #2:   9%|█████████████████▋                                                                                                                                                                                    | 852/9523 [00:00<00:01, 8511.55ex/s]
+removing punctuation from train split #3:   7%|████████████▉                                                                                                                                                                                         | 621/9523 [00:00<00:01, 5886.15ex/s]
+removing punctuation from train split #4:   0%|                                                                                                                                                                                                                  | 0/9523 [00:00<?, ?ex/s]
+removing punctuation from train split #5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8302.81ex/s]
+removing punctuation from train split #7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8965.67ex/s]
+removing punctuation from train split #8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8549.97ex/s]
+removing punctuation from train split #6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7512.50ex/s]
+removing punctuation from train split #10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8474.65ex/s]
+removing punctuation from train split #11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 8349.35ex/s]
+removing punctuation from train split #12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8313.81ex/s]
+removing punctuation from train split #14: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8994.21ex/s]
+removing punctuation from train split #9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9523/9523 [00:01<00:00, 7404.51ex/s]
+removing punctuation from train split #13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8322.32ex/s]
+removing punctuation from train split #15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8262.75ex/s]
+removing punctuation from train split #16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8437.71ex/s]
+removing punctuation from train split #17: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8096.44ex/s]
+removing punctuation from train split #19: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8373.59ex/s]
+removing punctuation from train split #18: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8275.76ex/s]
+removing punctuation from train split #20: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9522/9522 [00:01<00:00, 8356.80ex/s]
+removing punctuation from train split #11:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 9453/9523 [00:01<00:00, 8224.48ex/s]
+removing punctuation from train split #13:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                    | 8550/9522 [00:01<00:00, 8122.42ex/s]
+removing punctuation from train split #12:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 9409/9522 [00:01<00:00, 8231.67ex/s]
+removing punctuation from train split #15:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                         | 6733/9522 [00:00<00:00, 8594.26ex/s]
+removing punctuation from train split #14:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊      | 9224/9522 [00:01<00:00, 9118.96ex/s]
+removing punctuation from train split #16:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                     | 7724/9522 [00:00<00:00, 8075.70ex/s]
+removing punctuation from train split #13:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 9407/9522 [00:01<00:00, 8253.20ex/s]
+removing punctuation from train split #19:  53%|███████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                            | 5038/9522 [00:00<00:00, 8535.33ex/s]
+removing punctuation from train split #15:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                      | 8450/9522 [00:01<00:00, 8122.89ex/s]
+removing punctuation from train split #15:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌    | 9304/9522 [00:01<00:00, 8244.37ex/s]
+removing punctuation from train split #16:  90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                   | 8602/9522 [00:01<00:00, 8281.50ex/s]
+removing punctuation from train split #16: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 9484/9522 [00:01<00:00, 8439.03ex/s]
+removing punctuation from train split #18:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                       | 7591/9522 [00:00<00:00, 7763.14ex/s]
+removing punctuation from train split #17:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋        | 9120/9522 [00:01<00:00, 8049.02ex/s]
+removing punctuation from train split #18:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                     | 8468/9522 [00:01<00:00, 8054.35ex/s]
+removing punctuation from train split #18:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋   | 9359/9522 [00:01<00:00, 8301.62ex/s]
+removing punctuation from train split #19:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 9422/9522 [00:01<00:00, 8417.18ex/s]
+removing punctuation from train split #20:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                     | 8476/9522 [00:01<00:00, 8203.44ex/s]
+removing punctuation from train split #22:  63%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                         | 5961/9522 [00:00<00:00, 8624.27ex/s]
+removing punctuation from train split #20:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊   | 9367/9522 [00:01<00:00, 8412.21ex/s]
+removing punctuation from train split #21:  79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                         | 7527/9522 [00:00<00:00, 7989.12ex/s]
+removing punctuation from train split #21:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                      | 8419/9522 [00:01<00:00, 8262.05ex/s]
+removing punctuation from train split #22:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                     | 7696/9522 [00:00<00:00, 8087.76ex/s]
+removing punctuation from train split #23:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                     | 7716/9522 [00:00<00:00, 8065.63ex/s]
+removing punctuation from train split #25:  63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                        | 5989/9522 [00:00<00:00, 8679.28ex/s]
+removing punctuation from train split #25:  72%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                      | 6892/9522 [00:00<00:00, 8787.95ex/s]
+removing punctuation from train split #27:  54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                          | 5128/9522 [00:00<00:00, 8658.24ex/s]
+removing punctuation from train split #28:  44%|█████████████████████████████████████████████████████████████████████████████████████▍                                                                                                              | 4148/9522 [00:00<00:00, 8491.76ex/s]
+removing punctuation from train split #28:  53%|████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                            | 5056/9522 [00:00<00:00, 8689.76ex/s]
+removing punctuation from train split #30:  34%|███████████████████████████████████████████████████████████████████▍                                                                                                                                | 3277/9522 [00:00<00:00, 8353.19ex/s]
+removing punctuation from train split #31:  26%|██████████████████████████████████████████████████▋                                                                                                                                                 | 2462/9522 [00:00<00:00, 8285.25ex/s]
+removing punctuation from train split #28:  72%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                      | 6872/9522 [00:00<00:00, 8890.19ex/s]
+removing punctuation from train split #27:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍               | 8765/9522 [00:01<00:00, 8671.40ex/s]
+removing punctuation from train split #28:  82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                    | 7762/9522 [00:00<00:00, 8384.41ex/s]
+removing punctuation from train split #28:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                 | 8694/9522 [00:01<00:00, 8659.31ex/s]
+removing punctuation from train split #30:  73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                    | 6984/9522 [00:00<00:00, 9140.22ex/s]
+removing punctuation from train split #29:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎              | 8811/9522 [00:01<00:00, 8724.53ex/s]
+removing punctuation from train split #30:  83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                 | 7899/9522 [00:00<00:00, 8708.02ex/s]
+removing punctuation from train split #30:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋              | 8827/9522 [00:01<00:00, 8876.45ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00019_of_00032.arrow8675/9522 [00:01<00:00, 8836.09ex/s]
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-9045c40777a7f3cb_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-296256ac19389512_00031_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00000_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00001_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00002_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00003_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00004_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00005_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00006_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00007_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00008_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00009_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00010_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00011_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00012_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00013_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00014_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00015_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00016_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00017_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00018_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00019_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00020_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00021_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00022_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00023_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00024_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00025_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00026_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00027_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00028_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00029_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00030_of_00032.arrow
+WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/javierr/.cache/huggingface/datasets/NbAiLab___nst/no-close/1.0.0/c9a1b1da598ea4a1b584c09ff0e7b0e06974f08bd0329959417147f3f5866f53/cache-30518bb67e20f98b_00031_of_00032.arrow
+preprocess dataset #0:   1%|█▎                                                                                                                                                                                                                          | 58/9497 [00:02<04:23, 35.85ex/s]
+preprocess dataset #1:   0%|▊                                                                                                                                                                                                                           | 33/9497 [00:01<04:48, 32.84ex/s]
+preprocess dataset #2:   1%|██                                                                                                                                                                                                                          | 87/9497 [00:02<03:06, 50.58ex/s]
+preprocess dataset #3:   0%|▉                                                                                                                                                                                                                           | 41/9497 [00:01<03:50, 40.97ex/s]
+preprocess dataset #4:   0%|▎                                                                                                                                                                                                                           | 12/9497 [00:00<08:23, 18.83ex/s]
+preprocess dataset #5:   1%|██                                                                                                                                                                                                                          | 91/9497 [00:02<02:55, 53.59ex/s]
+preprocess dataset #6:   1%|█▏                                                                                                                                                                                                                          | 52/9497 [00:01<03:01, 52.04ex/s]
+preprocess dataset #7:   1%|██▊                                                                                                                                                                                                                        | 124/9497 [00:02<03:07, 49.99ex/s]
+preprocess dataset #8:   1%|█▏                                                                                                                                                                                                                          | 52/9497 [00:01<03:15, 48.43ex/s]
+preprocess dataset #9:   0%|▏                                                                                                                                                                                                                            | 7/9497 [00:00<15:17, 10.34ex/s]
+preprocess dataset #10:   1%|██▎                                                                                                                                                                                                                        | 99/9497 [00:02<03:07, 50.16ex/s]
+preprocess dataset #11:   0%|▎                                                                                                                                                                                                                          | 12/9496 [00:00<08:48, 17.95ex/s]
+preprocess dataset #12:   0%|█                                                                                                                                                                                                                          | 47/9496 [00:01<04:02, 39.01ex/s]
+preprocess dataset #13:   1%|██▎                                                                                                                                                                                                                       | 103/9496 [00:02<03:29, 44.74ex/s]
+preprocess dataset #14:   0%|▊                                                                                                                                                                                                                          | 37/9496 [00:01<03:43, 42.24ex/s]
+preprocess dataset #15:   0%|                                                                                                                                                                                                                          | 1/9496 [00:00<2:10:55,  1.21ex/s]
+preprocess dataset #16:   0%|▉                                                                                                                                                                                                                          | 43/9496 [00:02<05:20, 29.53ex/s]
+preprocess dataset #17:   0%|                                                                                                                                                                                                                          | 1/9496 [00:00<2:03:24,  1.28ex/s]
+preprocess dataset #18:   0%|▊                                                                                                                                                                                                                          | 35/9496 [00:01<05:47, 27.20ex/s]
+preprocess dataset #19:   1%|█▉                                                                                                                                                                                                                         | 84/9496 [00:03<04:51, 32.29ex/s]
+preprocess dataset #20:   0%|▉                                                                                                                                                                                                                          | 42/9496 [00:02<04:46, 32.99ex/s]
+preprocess dataset #21:   0%|▎                                                                                                                                                                                                                          | 12/9496 [00:01<10:20, 15.29ex/s]
+preprocess dataset #22:   1%|█▏                                                                                                                                                                                                                         | 53/9496 [00:02<04:39, 33.79ex/s]
+preprocess dataset #23:   1%|██                                                                                                                                                                                                                         | 87/9496 [00:03<04:24, 35.60ex/s]
+preprocess dataset #24:   0%|▊                                                                                                                                                                                                                          | 36/9496 [00:02<05:54, 26.71ex/s]
+preprocess dataset #25:   1%|█▏                                                                                                                                                                                                                         | 50/9496 [00:02<05:24, 29.10ex/s]
+preprocess dataset #26:   1%|█▋                                                                                                                                                                                                                         | 71/9496 [00:03<05:59, 26.21ex/s]
+preprocess dataset #27:   0%|▋                                                                                                                                                                                                                          | 32/9496 [00:01<06:10, 25.54ex/s]
+preprocess dataset #28:   1%|█                                                                                                                                                                                                                          | 48/9496 [00:02<07:04, 22.27ex/s]
+preprocess dataset #29:   0%|▏                                                                                                                                                                                                                           | 9/9496 [00:01<14:14, 11.10ex/s]
+preprocess dataset #30:   1%|█                                                                                                                                                                                                                          | 48/9496 [00:02<04:54, 32.10ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  76%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                    | 7221/9496 [03:49<01:16, 29.85ex/s]
+
+
+
+
+
+preprocess dataset #29:  77%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                  | 7273/9496 [04:02<01:04, 34.54ex/s]
+preprocess dataset #30:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                           | 7612/9496 [04:01<00:53, 35.45ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #30:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                  | 8708/9496 [04:35<00:20, 38.32ex/s]
+preprocess dataset #28:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████               | 8844/9496 [04:40<00:20, 31.83ex/s]
+preprocess dataset #29:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                         | 8387/9496 [04:38<00:27, 39.92ex/s]
+preprocess dataset #30:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                | 8775/9496 [04:37<00:18, 39.23ex/s]
+preprocess dataset #28:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋             | 8915/9496 [04:41<00:15, 38.18ex/s]
+preprocess dataset #29:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                     | 8543/9496 [04:42<00:23, 39.92ex/s]
+preprocess dataset #30:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎            | 8940/9496 [04:41<00:13, 39.79ex/s]
+preprocess dataset #29:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                    | 8610/9496 [04:44<00:25, 35.19ex/s]
+preprocess dataset #30:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌           | 8997/9496 [04:43<00:16, 30.18ex/s]
+preprocess dataset #29:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                  | 8674/9496 [04:46<00:18, 43.30ex/s]
+preprocess dataset #29:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                  | 8679/9496 [04:46<00:19, 42.79ex/s]
+preprocess dataset #31:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏           | 8979/9496 [04:44<00:11, 43.55ex/s]
+preprocess dataset #29:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                | 8778/9496 [04:49<00:18, 39.35ex/s]
+preprocess dataset #30:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏       | 9153/9496 [04:48<00:09, 37.46ex/s]
+
+
+preprocess dataset #29:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌           | 8995/9496 [04:54<00:10, 46.66ex/s]
+preprocess dataset #30:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 9416/9496 [04:53<00:01, 49.80ex/s]
+preprocess dataset #30:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 9427/9496 [04:53<00:01, 44.88ex/s]
+preprocess dataset #27:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 9379/9496 [04:59<00:02, 54.53ex/s]
+preprocess dataset #29:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊          | 9051/9496 [04:56<00:10, 41.82ex/s]
+preprocess dataset #29:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌         | 9082/9496 [04:56<00:07, 56.01ex/s]
+preprocess dataset #31:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 9411/9496 [04:53<00:01, 43.08ex/s]
+
+preprocess dataset #29:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████       | 9195/9496 [04:58<00:05, 53.54ex/s]
+preprocess dataset #29:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋    | 9305/9496 [05:00<00:02, 74.26ex/s]
+preprocess dataset #29:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 9329/9496 [05:00<00:02, 72.77ex/s]
+preprocess dataset #29:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋   | 9353/9496 [05:01<00:01, 71.89ex/s]
+preprocess dataset #29:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 9369/9496 [05:01<00:01, 72.28ex/s]
+preprocess dataset #29:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 9434/9496 [05:02<00:00, 69.09ex/s]
+preprocess dataset #1:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #2:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #3:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #4:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #5:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #6:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #7:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #8:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #9:   0%|                                                                                                                                                                                                                                     | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #10:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #11:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #12:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #13:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #14:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #15:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #16:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #17:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #18:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #19:   0%|                                                                                                                                                                                                                                    | 0/1267 [00:00<?, ?ex/s]
+preprocess dataset #20:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #21:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #22:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #23:   0%|                                                                                                                                                                                                                                    | 0/1266 [00:00<?, ?ex/s]
+preprocess dataset #24:   3%|██████▍                                                                                                                                                                                                                    | 37/1266 [00:01<00:37, 32.40ex/s]
+preprocess dataset #25:   3%|██████▍                                                                                                                                                                                                                    | 37/1266 [00:01<00:37, 32.48ex/s]
+preprocess dataset #26:   3%|██████▍                                                                                                                                                                                                                    | 37/1266 [00:01<00:37, 32.59ex/s]
+preprocess dataset #27:   3%|██████▍                                                                                                                                                                                                                    | 37/1266 [00:01<00:37, 32.61ex/s]
+preprocess dataset #28:   3%|█████▉                                                                                                                                                                                                                     | 34/1266 [00:01<00:38, 32.16ex/s]
+preprocess dataset #29:   2%|█████                                                                                                                                                                                                                      | 29/1266 [00:01<00:43, 28.61ex/s]
+preprocess dataset #30:   2%|████▊                                                                                                                                                                                                                      | 28/1266 [00:01<00:44, 27.55ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #29:  79%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                            | 1006/1266 [00:37<01:58,  2.19ex/s]
+preprocess dataset #30:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                            | 1009/1266 [00:37<01:23,  3.10ex/s]
+preprocess dataset #9:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                             | 1096/1267 [00:40<00:04, 38.24ex/s]
+preprocess dataset #9:  87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                            | 1101/1267 [00:40<00:04, 40.08ex/s]
+preprocess dataset #10:  88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                           | 1110/1267 [00:40<00:03, 40.69ex/s]
+preprocess dataset #11:  88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                          | 1111/1267 [00:40<00:04, 37.07ex/s]
+preprocess dataset #12:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                             | 1095/1267 [00:40<00:04, 37.81ex/s]
+preprocess dataset #13:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                     | 1145/1267 [00:40<00:03, 39.69ex/s]
+preprocess dataset #15:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                             | 1094/1267 [00:40<00:04, 39.51ex/s]
+preprocess dataset #15:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                            | 1103/1267 [00:40<00:04, 36.64ex/s]
+preprocess dataset #16:  87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                             | 1097/1267 [00:40<00:04, 39.16ex/s]
+preprocess dataset #17:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                    | 1146/1267 [00:40<00:03, 38.19ex/s]
+preprocess dataset #19:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                              | 1091/1267 [00:40<00:04, 39.40ex/s]
+preprocess dataset #20:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                             | 1097/1266 [00:40<00:04, 39.89ex/s]
+preprocess dataset #20:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                            | 1102/1266 [00:40<00:04, 35.66ex/s]
+preprocess dataset #23:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                    | 1052/1266 [00:38<00:07, 27.97ex/s]
+preprocess dataset #23:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                             | 1092/1266 [00:40<00:04, 37.93ex/s]
+preprocess dataset #23:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                            | 1100/1266 [00:40<00:04, 37.18ex/s]
+preprocess dataset #24:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                           | 1108/1266 [00:40<00:03, 43.09ex/s]
+preprocess dataset #25:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                            | 1101/1266 [00:40<00:04, 38.50ex/s]
+preprocess dataset #27:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                              | 1087/1266 [00:39<00:04, 38.37ex/s]
+preprocess dataset #27:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                              | 1091/1266 [00:40<00:05, 34.55ex/s]
+preprocess dataset #29:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                             | 1093/1266 [00:39<00:04, 36.16ex/s]
+preprocess dataset #29:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                             | 1097/1266 [00:39<00:04, 37.12ex/s]
+preprocess dataset #31:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                              | 1090/1266 [00:39<00:04, 37.44ex/s]
+
+preprocess dataset #23:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 1252/1266 [00:44<00:00, 39.48ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1261/1266 [00:44<00:00, 43.27ex/s]
+preprocess dataset #27:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████     | 1237/1266 [00:43<00:00, 45.49ex/s]
+preprocess dataset #27:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 1243/1266 [00:43<00:00, 46.07ex/s]
+preprocess dataset #28:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉   | 1248/1266 [00:43<00:00, 42.55ex/s]
+preprocess dataset #30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 1262/1266 [00:43<00:00, 46.23ex/s]
+preprocess dataset #29:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 1249/1266 [00:43<00:00, 43.15ex/s]
+preprocess dataset #27:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉   | 1248/1266 [00:44<00:00, 39.91ex/s]
+preprocess dataset #28:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 1253/1266 [00:44<00:00, 39.03ex/s]
+preprocess dataset #23: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 1265/1266 [00:44<00:00, 39.32ex/s]
+preprocess dataset #27:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 1253/1266 [00:44<00:00, 39.38ex/s]
+preprocess dataset #28:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 1257/1266 [00:44<00:00, 36.91ex/s]
+preprocess dataset #29:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 1259/1266 [00:44<00:00, 38.76ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 1261/1266 [00:44<00:00, 37.07ex/s]
+preprocess dataset #27:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 1258/1266 [00:44<00:00, 38.40ex/s]
+preprocess dataset #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 1263/1266 [00:44<00:00, 38.25ex/s]
+preprocess dataset #28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1266/1266 [00:44<00:00, 38.43ex/s]
+preprocess dataset #2:   1%|███▏                                                                                                                                                                                                                        | 37/2555 [00:01<01:21, 30.82ex/s]
+preprocess dataset #3:   1%|██▋                                                                                                                                                                                                                         | 31/2555 [00:01<01:36, 26.14ex/s]
+preprocess dataset #4:   1%|███                                                                                                                                                                                                                         | 35/2555 [00:01<01:21, 30.99ex/s]
+preprocess dataset #5:   1%|███                                                                                                                                                                                                                         | 35/2555 [00:01<01:18, 32.27ex/s]
+preprocess dataset #6:   1%|██▉                                                                                                                                                                                                                         | 34/2554 [00:01<01:21, 30.98ex/s]
+preprocess dataset #7:   1%|██▍                                                                                                                                                                                                                         | 29/2554 [00:01<01:30, 27.80ex/s]
+preprocess dataset #8:   1%|██▌                                                                                                                                                                                                                         | 30/2554 [00:01<01:24, 29.80ex/s]
+preprocess dataset #9:   1%|██▌                                                                                                                                                                                                                         | 30/2554 [00:01<01:29, 28.36ex/s]
+preprocess dataset #10:   1%|█▋                                                                                                                                                                                                                         | 19/2554 [00:01<02:08, 19.75ex/s]
+preprocess dataset #11:   1%|██                                                                                                                                                                                                                         | 24/2554 [00:01<01:38, 25.75ex/s]
+preprocess dataset #12:   1%|██▏                                                                                                                                                                                                                        | 25/2554 [00:01<01:36, 26.30ex/s]
+preprocess dataset #13:   1%|█▉                                                                                                                                                                                                                         | 22/2554 [00:01<01:45, 24.10ex/s]
+preprocess dataset #14:   1%|█▌                                                                                                                                                                                                                         | 18/2554 [00:01<01:56, 21.76ex/s]
+preprocess dataset #15:   1%|█▊                                                                                                                                                                                                                         | 21/2554 [00:01<01:45, 24.11ex/s]
+preprocess dataset #16:   1%|██                                                                                                                                                                                                                         | 24/2554 [00:01<01:31, 27.53ex/s]
+preprocess dataset #17:   1%|█▎                                                                                                                                                                                                                         | 16/2554 [00:01<02:07, 19.87ex/s]
+preprocess dataset #18:   1%|█▎                                                                                                                                                                                                                         | 15/2554 [00:01<02:09, 19.58ex/s]
+preprocess dataset #19:   1%|█▎                                                                                                                                                                                                                         | 16/2554 [00:01<01:58, 21.45ex/s]
+preprocess dataset #20:   1%|█▎                                                                                                                                                                                                                         | 15/2554 [00:01<02:11, 19.30ex/s]
+preprocess dataset #21:   1%|█                                                                                                                                                                                                                          | 13/2554 [00:01<02:40, 15.86ex/s]
+preprocess dataset #22:   1%|█▍                                                                                                                                                                                                                         | 17/2554 [00:01<01:59, 21.17ex/s]
+preprocess dataset #23:   0%|█                                                                                                                                                                                                                          | 12/2554 [00:01<02:44, 15.46ex/s]
+preprocess dataset #24:   1%|█                                                                                                                                                                                                                          | 13/2554 [00:01<02:24, 17.57ex/s]
+preprocess dataset #25:   1%|█                                                                                                                                                                                                                          | 13/2554 [00:01<02:17, 18.44ex/s]
+preprocess dataset #26:   0%|▋                                                                                                                                                                                                                           | 8/2554 [00:01<03:49, 11.11ex/s]
+preprocess dataset #27:   0%|▋                                                                                                                                                                                                                           | 8/2554 [00:01<03:54, 10.86ex/s]
+preprocess dataset #28:   0%|▊                                                                                                                                                                                                                           | 9/2554 [00:01<03:32, 11.98ex/s]
+preprocess dataset #29:   0%|▊                                                                                                                                                                                                                           | 9/2554 [00:01<03:26, 12.32ex/s]
+preprocess dataset #30:   0%|▌                                                                                                                                                                                                                           | 6/2554 [00:00<05:18,  8.01ex/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 2546/2554 [01:20<00:00, 38.88ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2548/2554 [01:20<00:00, 30.30ex/s]
+preprocess dataset #31:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 2524/2554 [01:20<00:01, 25.42ex/s]
+preprocess dataset #18: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 2552/2554 [01:20<00:00, 30.84ex/s]
+preprocess dataset #27:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 2539/2554 [01:20<00:00, 29.98ex/s]
+preprocess dataset #24: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 2552/2554 [01:20<00:00, 31.92ex/s]
+preprocess dataset #28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2551/2554 [01:20<00:00, 37.46ex/s]
+preprocess dataset #27: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 2543/2554 [01:20<00:00, 32.22ex/s]
+preprocess dataset #31:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 2535/2554 [01:20<00:00, 30.26ex/s]
+preprocess dataset #27: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 2547/2554 [01:20<00:00, 32.09ex/s]
+preprocess dataset #27: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 2551/2554 [01:20<00:00, 30.75ex/s]
+preprocess dataset #31:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 2539/2554 [01:20<00:00, 29.00ex/s]
+preprocess dataset #31: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 2543/2554 [01:20<00:00, 28.57ex/s]
+
+#20: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 526.18ba/s]
+#21: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 456.85ba/s]
+#22: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 518.64ba/s]
+#24: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 518.24ba/s]
+#25: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 528.49ba/s]
+#23: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 332.25ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 525.32ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 513.33ba/s]
+#26: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 380.35ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 531.77ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 543.23ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 421.66ba/s]
+#19:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 264.82ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 278.75ba/s]
+#25: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 259.23ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 258.06ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 263.49ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 253.76ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 288.59ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 278.22ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 281.94ba/s]
+#17:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 285.41ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 330.87ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 355.53ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 132.37ba/s]
+#29: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 135.53ba/s]
+#30: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 137.11ba/s]
+#31: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 139.51ba/s]
+#27: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 103.45ba/s]
+https://symbolize.stripped_domain/r/?trace=7f975fceb3f4,7f975fd3f0bf,7f,3b6e5aacba6a2c00&map=                                                                                                                                                                      | 0/10 [00:00<?, ?ba/s]
+*** SIGTERM received by PID 491294 (TID 491294) on cpu 6 from PID 1493173; stack trace: ***
+PC: @     0x7f975fceb3f4  (unknown)  do_futex_wait.constprop.0                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @     0x7f951052c294        976  (unknown)
+    @     0x7f975fd3f0c0  829279376  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+    @               0x80  (unknown)  (unknown)
+    @ 0x3b6e5aacba6a2c01  (unknown)  (unknown)                                                                                                                                                                                                                     | 0/10 [00:00<?, ?ba/s]
+https://symbolize.stripped_domain/r/?trace=7f975fceb3f4,7f951052c293,7f975fd3f0bf,7f,3b6e5aacba6a2c00&map=fbcd4e3f2be272741f2aecd9d840a066:7f94faf8f000-7f95108bec60                                                                                               | 0/10 [00:00<?, ?ba/s]
+E0730 17:57:30.385707  491294 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.
+E0730 17:57:30.432942  491294 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#26:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#20:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                | 8/10 [00:00<00:00, 74.46ba/s]
+#27:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                         | 0/10 [00:00<?, ?ba/s]
+#10: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 102.38ba/s]
+#11: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 98.22ba/s]
+#13: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 106.97ba/s]
+#12: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 73.02ba/s]
+#14: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 88.58ba/s]
+#16: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 103.18ba/s]
+#15: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 71.80ba/s]
+#17: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 83.13ba/s]
+#19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 105.94ba/s]
+#18: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 75.83ba/s]
+#20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 105.95ba/s]
+#21: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 102.13ba/s]
+#22: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 103.00ba/s]
+#23: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 105.70ba/s]
+#24: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 106.51ba/s]
+#26: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 99.43ba/s]
+#25: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 71.78ba/s]
+#28: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 107.46ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 102.63ba/s]
+#29: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 109.44ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 108.36ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 115.33ba/s]
+#18:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#19:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#20:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#21:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#22:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#23:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/2 [00:00<?, ?ba/s]
+#27: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 106.08ba/s]
+#26: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 102.63ba/s]
+#30: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 120.64ba/s]
+#31: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 124.96ba/s]
+#28: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 78.15ba/s]
+#29: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 84.73ba/s]
+https://symbolize.stripped_domain/r/?trace=7f975fceb3f4,7f975fd3f0bf,7f,3b6e5aacba6a2c00&map=
+*** SIGTERM received by PID 491693 (TID 491693) on cpu 20 from PID 1493173; stack trace: ***
+PC: @     0x7f975fceb3f4  (unknown)  do_futex_wait.constprop.0                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+    @     0x7f951052c294        976  (unknown)                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+    @     0x7f975fd3f0c0  829279376  (unknown)                                                                                                                                                                                                                      | 0/3 [00:00<?, ?ba/s]
+    @               0x80  (unknown)  (unknown)
+    @ 0x3b6e5aacba6a2c01  (unknown)  (unknown)
+https://symbolize.stripped_domain/r/?trace=7f975fceb3f4,7f951052c293,7f975fd3f0bf,7f,3b6e5aacba6a2c00&map=fbcd4e3f2be272741f2aecd9d840a066:7f94faf8f000-7f95108bec60                                                                                                | 0/3 [00:00<?, ?ba/s]
+E0730 17:57:46.553148  491693 coredump_hook.cc:324] RAW: Remote crash gathering disabled for SIGTERM.                                                                                                                                                               | 0/3 [00:00<?, ?ba/s]
+E0730 17:57:46.629543  491693 process_state.cc:774] RAW: Raising signal 15 with default behavior
+#23:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#24:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#25:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#26:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#27:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#28:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#29:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+#30:   0%|                                                                                                                                                                                                                                                          | 0/3 [00:00<?, ?ba/s]
+  "apply_spec_augment": true,                                                                                                                                                                                                                                       | 0/3 [00:00<?, ?ba/s]
+  "architectures": [
+    "Wav2Vec2ForPreTraining"
+  ],
+  "attention_dropout": 0.094,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.04,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "fuse_matmuls": false,
+  "gradient_checkpointing": true,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.047,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.041,
+  "mask_feature_length": 64,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.25,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.082,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 38,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.21.0",
+  "use_scan": false,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 39,
+  "xvector_output_dim": 512
+}
+loading feature extractor configuration file ./preprocessor_config.json
+Feature extractor Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}
+loading file ./vocab.json
+loading file ./tokenizer_config.json
+loading file ./added_tokens.json
+loading file ./special_tokens_map.json
+Adding <s> to the vocabulary
+Adding </s> to the vocabulary
+2022-07-30 17:57:48.312630: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
+2022-07-30 17:57:48.312680: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
+/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+WARNING:huggingface_hub.repository:/data/wav2vec2-1b-npsc-nst/./ is already a clone of https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst. Make sure you pull the latest changes with `repo.git_pull()`.
+run_flax_speech_recognition_ctc.py:337: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.bfloat16) if x.dtype == jnp.float32 else x, t)
+INFO:__main__:***** Running training *****
+INFO:__main__:  Num examples = 302693
+INFO:__main__:  Num Epochs = 40
+INFO:__main__:  Instantaneous batch size per device = 12
+INFO:__main__:  Num gradient accumulation steps = 1
+INFO:__main__:  Total train batch size (w. parallel & distributed) = 96
+INFO:__main__:  Total optimization steps = 126120
+INFO:__main__:  Gradient checkpointing: True
+INFO:__main__:  Use scan: False
+INFO:__main__:  Fuse matmuls: False
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                            | 0/40 [00:00<?, ?it/s]
+  grads = jax.tree_map(lambda t: (t / g_norm) * casted_max_grad_norm, grads)                                                                                                                                                                                     | 0/3153 [00:00<?, ?it/s]
+run_flax_speech_recognition_ctc.py:333: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
+run_flax_speech_recognition_ctc.py:1391: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_grad_norm = jax.tree_map(jnp.linalg.norm, grad)
+run_flax_speech_recognition_ctc.py:1398: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  layer_param_norm = jax.tree_map(jnp.linalg.norm, new_state.params)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:   3%|███████▏                                                                                                                                                                                                                           | 100/3153 [40:10<1:08:57,  1.36s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:   6%|██████████████▎                                                                                                                                                                                                                    | 199/3153 [46:23<1:06:17,  1.35s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  10%|█████████████████████▌                                                                                                                                                                                                             | 300/3153 [52:18<1:07:24,  1.42s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  13%|████████████████████████████▉                                                                                                                                                                                                        | 399/3153 [58:17<59:54,  1.31s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  16%|███████████████████████████████████▉                                                                                                                                                                                               | 499/3153 [1:04:01<58:38,  1.33s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  19%|███████████████████████████████████████████                                                                                                                                                                                        | 599/3153 [1:09:45<56:15,  1.32s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  22%|█████████████████████████████████████████████████▉                                                                                                                                                                               | 700/3153 [1:15:55<1:06:31,  1.63s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  25%|█████████████████████████████████████████████████████████▌                                                                                                                                                                         | 799/3153 [1:21:38<52:27,  1.34s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  29%|████████████████████████████████████████████████████████████████▏                                                                                                                                                                | 899/3153 [1:27:25<1:03:19,  1.69s/it]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Training...:  32%|███████████████████████████████████████████████████████████████████████▉                                                                                                                                                           | 999/3153 [1:33:22<47:14,  1.32s/it]
+Evaluating ...:   0%|                                                                                                                                                                                                                                             | 0/421 [00:00<?, ?it/s]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  eval_metrics = jax.tree_map(jnp.mean, eval_metrics)███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 421/421 [28:34<00:00,  1.49s/it]
+Epoch ... (1/40):   0%|                                                                                                                                                                                                                                          | 0/40 [2:02:35<?, ?it/s]run_flax_speech_recognition_ctc.py:1479: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
+  params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))████████████████████▉                                                                                                                                                           | 999/3153 [2:02:28<47:14,  1.32s/it]
+Step... (1000/126120 | Eval Loss: 0.4141400158405304 | Eval wer: 0.3266681571206439 | Eval cer: 0.09048648012546995 |)
+Configuration saved in /data/wav2vec2-1b-npsc-nst/config.json
+tcmalloc: large alloc 1207312384 bytes == 0x8f8c84000 @  0x7f975ff0f680 0x7f975ff2fbdd 0x7f95d73f01ff 0x7f95d73ff42c 0x7f95d740041d 0x7f95d740041d 0x7f95d740041d 0x7f95d740041d 0x7f95d740041d 0x7f95d740041d 0x7f95d740041d 0x7f95d73fa164 0x7f95d73fa91e 0x505166 0x56bbfa 0x569dba 0x5f6eb3 0x56cc1f 0x569dba 0x5f6eb3 0x56cc1f 0x5f6cd6 0x56bacd 0x569dba 0x50bca0 0x56cc1f 0x569dba 0x5f6eb3 0x56bacd 0x569dba 0x5f6eb3
+tcmalloc: large alloc 2466676736 bytes == 0x9413e6000 @  0x7f975ff0f680 0x7f975ff2fbdd 0x7f95d73f01ff 0x7f95d73ff42c 0x7f95d740041d 0x7f95d740041d 0x7f95d740041d 0x7f95d740041d 0x7f95d740041d 0x7f95d740041d 0x7f95d740041d 0x7f95d73fa164 0x7f95d73fa91e 0x505166 0x56bbfa 0x569dba 0x5f6eb3 0x56cc1f 0x569dba 0x5f6eb3 0x56cc1f 0x5f6cd6 0x56bacd 0x569dba 0x50bca0 0x56cc1f 0x569dba 0x5f6eb3 0x56bacd 0x569dba 0x5f6eb3
+tcmalloc: large alloc 4985405440 bytes == 0x9d444e000 @  0x7f975ff0f680 0x7f975ff2fbdd 0x7f95d73f01ff 0x7f95d73ff42c 0x7f95d740041d 0x7f95d740041d 0x7f95d740041d 0x7f95d740041d 0x7f95d740041d 0x7f95d740041d 0x7f95d740041d 0x7f95d73fa164 0x7f95d73fa91e 0x505166 0x56bbfa 0x569dba 0x5f6eb3 0x56cc1f 0x569dba 0x5f6eb3 0x56cc1f 0x5f6cd6 0x56bacd 0x569dba 0x50bca0 0x56cc1f 0x569dba 0x5f6eb3 0x56bacd 0x569dba 0x5f6eb3
+tcmalloc: large alloc 3850223616 bytes == 0xafd6c2000 @  0x7f975ff0f680 0x7f975ff30824 0x5fb391 0x7f95d73fa209 0x7f95d73fa91e 0x505166 0x56bbfa 0x569dba 0x5f6eb3 0x56cc1f 0x569dba 0x5f6eb3 0x56cc1f 0x5f6cd6 0x56bacd 0x569dba 0x50bca0 0x56cc1f 0x569dba 0x5f6eb3 0x56bacd 0x569dba 0x5f6eb3 0x56bacd 0x569dba 0x6902a7 0x67f951 0x67f9cf 0x67fa71 0x681b97 0x6b9d32
+Model weights saved in /data/wav2vec2-1b-npsc-nst/flax_model.msgpack
+tokenizer config file saved in ./tokenizer_config.json
+Special tokens file saved in ./special_tokens_map.json
diff --git a/wandb/run-20220730_174606-j2u4n7h4/files/requirements.txt b/wandb/run-20220730_174606-j2u4n7h4/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef78cbdea431c3d66bc5af51443394cb7955eab
--- /dev/null
+++ b/wandb/run-20220730_174606-j2u4n7h4/files/requirements.txt
@@ -0,0 +1,158 @@
+absl-py==1.0.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+cachetools==4.2.4
+certifi==2021.10.8
+cffi==1.15.1
+charset-normalizer==2.0.10
+chex==0.1.3
+click==8.0.3
+cloud-tpu-client==0.10
+cloud-tpu-profiler==2.4.0
+clu==0.0.6
+colorama==0.4.5
+commonmark==0.9.1
+configparser==5.2.0
+contextlib2==21.6.0
+cycler==0.11.0
+datasets==2.4.0
+decorator==5.1.0
+dill==0.3.4
+dm-tree==0.1.6
+docker-pycreds==0.4.0
+etils==0.6.0
+exceptiongroup==1.0.0rc8
+filelock==3.4.2
+flatbuffers==2.0
+flax==0.5.3
+fonttools==4.28.5
+frozenlist==1.2.0
+fsspec==2021.11.1
+future==0.18.2
+gast==0.4.0
+gitdb==4.0.9
+gitpython==3.1.26
+google-api-core==1.31.5
+google-api-python-client==1.8.0
+google-auth-httplib2==0.1.0
+google-auth-oauthlib==0.4.6
+google-auth==2.3.3
+google-pasta==0.2.0
+googleapis-common-protos==1.54.0
+grpcio==1.43.0
+h5py==3.6.0
+httplib2==0.20.2
+huggingface-hub==0.2.1
+hypothesis==6.53.0
+idna==3.3
+importlib-metadata==4.10.0
+importlib-resources==5.4.0
+ipython==7.31.0
+jax==0.3.15
+jaxlib==0.3.15
+jedi==0.18.1
+jiwer==2.3.0
+joblib==1.1.0
+keras-preprocessing==1.1.2
+keras==2.7.0
+kiwisolver==1.3.2
+libclang==12.0.0
+librosa==0.9.2
+libtpu-nightly==0.1.dev20220722
+llvmlite==0.39.0
+markdown==3.3.6
+matplotlib-inline==0.1.3
+matplotlib==3.5.1
+ml-collections==0.1.0
+msgpack==1.0.3
+multidict==5.2.0
+multiprocess==0.70.12.2
+numba==0.56.0
+numpy==1.22.0
+oauth2client==4.1.3
+oauthlib==3.1.1
+opt-einsum==3.3.0
+optax==0.1.3
+packaging==21.3
+pandas==1.3.5
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+pillow==9.0.0
+pip==22.2.1
+pkg-resources==0.0.0
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.24
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyarrow==6.0.1
+pyasn1-modules==0.2.8
+pyasn1==0.4.8
+pycparser==2.21
+pyctcdecode==0.4.0
+pygments==2.11.1
+pygtrie==2.5.0
+pyparsing==3.0.6
+python-dateutil==2.8.2
+python-levenshtein==0.12.2
+pytz==2021.3
+pyyaml==6.0
+regex==2021.11.10
+requests-oauthlib==1.3.0
+requests==2.27.0
+resampy==0.3.1
+responses==0.18.0
+rich==11.2.0
+rsa==4.8
+sacremoses==0.0.46
+scikit-learn==1.1.1
+scipy==1.7.3
+sentry-sdk==1.5.2
+setuptools==44.0.0
+shortuuid==1.0.8
+six==1.16.0
+smmap==5.0.0
+sortedcontainers==2.4.0
+soundfile==0.10.3.post1
+sox==1.4.1
+subprocess32==3.5.4
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorboard==2.7.0
+tensorflow-cpu==2.7.0
+tensorflow-datasets==4.4.0
+tensorflow-estimator==2.7.0
+tensorflow-io-gcs-filesystem==0.23.1
+tensorflow-metadata==1.5.0
+tensorflow==2.7.0
+tensorstore==0.1.21
+termcolor==1.1.0
+threadpoolctl==3.1.0
+tokenizers==0.11.2
+toolz==0.11.2
+torch==1.12.0
+torchaudio==0.12.0+cpu
+tqdm==4.62.3
+traitlets==5.1.1
+transformers==4.21.0
+typing-extensions==4.3.0
+uritemplate==3.0.1
+urllib3==1.26.7
+wandb==0.12.9
+wcwidth==0.2.5
+werkzeug==2.0.2
+wheel==0.37.1
+wrapt==1.13.3
+xxhash==2.0.2
+yarl==1.7.2
+yaspin==2.1.0
+zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220730_174606-j2u4n7h4/files/wandb-metadata.json b/wandb/run-20220730_174606-j2u4n7h4/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..99ea371a3456517219e7c6539dc3858b51e4b946
--- /dev/null
+++ b/wandb/run-20220730_174606-j2u4n7h4/files/wandb-metadata.json
@@ -0,0 +1,68 @@
+{
+    "os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2022-07-30T17:46:09.947869",
+    "startedAt": "2022-07-30T17:46:06.453486",
+    "docker": null,
+    "cpu_count": 96,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=facebook/wav2vec2-xls-r-1b",
+        "--hub_model_id=NbAiLab/wav2vec2-1b-npsc-nst",
+        "--tokenizer_name=./",
+        "--output_dir=./",
+        "--overwrite_output_dir",
+        "--num_train_epochs=40",
+        "--per_device_train_batch_size=12",
+        "--per_device_eval_batch_size=12",
+        "--gradient_accumulation_steps=1",
+        "--precision=full_mixed",
+        "--matmul_precision=bfloat16",
+        "--learning_rate=1e-4",
+        "--warmup_steps=4000",
+        "--length_column_name=input_length",
+        "--evaluation_strategy=steps",
+        "--text_column_name=text",
+        "--save_steps=1000",
+        "--eval_steps=1000",
+        "--logging_steps=100",
+        "--layerdrop=0.041",
+        "--attention_dropout=0.094",
+        "--activation_dropout=0.055",
+        "--hidden_dropout=0.047",
+        "--save_total_limit=5",
+        "--freeze_feature_encoder",
+        "--feat_proj_dropout=0.04",
+        "--mask_time_prob=0.082",
+        "--mask_time_length=10",
+        "--mask_feature_prob=0.25",
+        "--mask_feature_length=64",
+        "--gradient_checkpointing",
+        "--min_duration_in_seconds=0.5",
+        "--max_duration_in_seconds=20.0",
+        "--use_auth_token",
+        "--seed=42",
+        "--group_by_length",
+        "--do_train",
+        "--do_eval",
+        "--push_to_hub",
+        "--preprocessing_num_workers=32",
+        "--ctc_zero_infinity",
+        "--do_lower_case",
+        "--wandb_project=wav2vec2",
+        "--wandb_name=wav2vec2-1b-npsc-nst",
+        "--remove_punctuation"
+    ],
+    "state": "running",
+    "program": "run_flax_speech_recognition_ctc.py",
+    "codePath": "run_flax_speech_recognition_ctc.py",
+    "git": {
+        "remote": "https://huggingface.co/NbAiLab/wav2vec2-1b-npsc-nst",
+        "commit": "63f0838b605b109a08e90f07fe84d6a94047f139"
+    },
+    "email": "versae@gmail.com",
+    "root": "/data/wav2vec2-1b-npsc-nst",
+    "host": "t1v-n-eedfb410-w-0",
+    "username": "javierr",
+    "executable": "/data/flax/bin/python"
+}
diff --git a/wandb/run-20220730_174606-j2u4n7h4/files/wandb-summary.json b/wandb/run-20220730_174606-j2u4n7h4/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..8f16843f7b643a799a6a508c3abff3714a7eb8b0
--- /dev/null
+++ b/wandb/run-20220730_174606-j2u4n7h4/files/wandb-summary.json
@@ -0,0 +1 @@
+{"train/grad_norm": 10.625, "layer_grad_norm/": {"lm_head": {"bias": 0.08154296875, "kernel": 1.640625}, "wav2vec2": {"encoder": {"layer_norm": {"bias": 0.059814453125, "scale": 0.056396484375}, "layers": {"0": {"attention": {"k_proj": {"bias": 0.0003337860107421875, "kernel": 0.16015625}, "out_proj": {"bias": 0.1015625, "kernel": 0.96484375}, "q_proj": {"bias": 0.0157470703125, "kernel": 0.220703125}, "v_proj": {"bias": 0.076171875, "kernel": 0.6640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.115234375, "kernel": 1.5625}, "output_dense": {"bias": 0.0546875, "kernel": 1.3046875}}, "final_layer_norm": {"bias": 0.271484375, "scale": 0.404296875}, "layer_norm": {"bias": 0.1337890625, "scale": 0.26953125}}, "1": {"attention": {"k_proj": {"bias": 0.0001506805419921875, "kernel": 0.10498046875}, "out_proj": {"bias": 0.06396484375, "kernel": 0.671875}, "q_proj": {"bias": 0.00994873046875, "kernel": 0.111328125}, "v_proj": {"bias": 0.09814453125, "kernel": 0.6015625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0771484375, "kernel": 1.109375}, "output_dense": {"bias": 0.058837890625, "kernel": 0.984375}}, "final_layer_norm": {"bias": 0.125, "scale": 0.11181640625}, "layer_norm": {"bias": 0.146484375, "scale": 0.10595703125}}, "10": {"attention": {"k_proj": {"bias": 7.104873657226562e-05, "kernel": 0.185546875}, "out_proj": {"bias": 0.0458984375, "kernel": 0.49609375}, "q_proj": {"bias": 0.012451171875, "kernel": 0.1943359375}, "v_proj": {"bias": 0.068359375, "kernel": 0.609375}}, "feed_forward": {"intermediate_dense": {"bias": 0.06689453125, "kernel": 0.984375}, "output_dense": {"bias": 0.04638671875, "kernel": 0.75}}, "final_layer_norm": {"bias": 0.109375, "scale": 0.10546875}, "layer_norm": {"bias": 0.126953125, "scale": 0.0986328125}}, "11": {"attention": {"k_proj": {"bias": 0.00012111663818359375, "kernel": 0.24609375}, "out_proj": {"bias": 0.04345703125, "kernel": 0.5859375}, "q_proj": {"bias": 0.01556396484375, "kernel": 0.2265625}, "v_proj": {"bias": 0.07373046875, "kernel": 0.734375}}, "feed_forward": {"intermediate_dense": {"bias": 0.061767578125, "kernel": 0.94140625}, "output_dense": {"bias": 0.042724609375, "kernel": 0.6796875}}, "final_layer_norm": {"bias": 0.0986328125, "scale": 0.0888671875}, "layer_norm": {"bias": 0.125, "scale": 0.07177734375}}, "12": {"attention": {"k_proj": {"bias": 6.961822509765625e-05, "kernel": 0.197265625}, "out_proj": {"bias": 0.04248046875, "kernel": 0.5}, "q_proj": {"bias": 0.0120849609375, "kernel": 0.1904296875}, "v_proj": {"bias": 0.06591796875, "kernel": 0.6328125}}, "feed_forward": {"intermediate_dense": {"bias": 0.064453125, "kernel": 0.93359375}, "output_dense": {"bias": 0.04052734375, "kernel": 0.671875}}, "final_layer_norm": {"bias": 0.1064453125, "scale": 0.0947265625}, "layer_norm": {"bias": 0.1005859375, "scale": 0.09765625}}, "13": {"attention": {"k_proj": {"bias": 0.00012493133544921875, "kernel": 0.232421875}, "out_proj": {"bias": 0.04150390625, "kernel": 0.578125}, "q_proj": {"bias": 0.01348876953125, "kernel": 0.22265625}, "v_proj": {"bias": 0.0712890625, "kernel": 0.7421875}}, "feed_forward": {"intermediate_dense": {"bias": 0.060546875, "kernel": 0.85546875}, "output_dense": {"bias": 0.0419921875, "kernel": 0.6796875}}, "final_layer_norm": {"bias": 0.10009765625, "scale": 0.09521484375}, "layer_norm": {"bias": 0.10009765625, "scale": 0.08349609375}}, "14": {"attention": {"k_proj": {"bias": 0.0002384185791015625, "kernel": 0.1806640625}, "out_proj": {"bias": 0.040283203125, "kernel": 0.5390625}, "q_proj": {"bias": 0.0115966796875, "kernel": 0.1796875}, "v_proj": {"bias": 0.0634765625, "kernel": 0.6640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0595703125, "kernel": 0.8515625}, "output_dense": {"bias": 0.0390625, "kernel": 0.6953125}}, "final_layer_norm": {"bias": 0.1015625, "scale": 0.0849609375}, "layer_norm": {"bias": 0.09033203125, "scale": 0.0634765625}}, "15": {"attention": {"k_proj": {"bias": 0.000179290771484375, "kernel": 0.228515625}, "out_proj": {"bias": 0.03857421875, "kernel": 0.6796875}, "q_proj": {"bias": 0.013671875, "kernel": 0.216796875}, "v_proj": {"bias": 0.06298828125, "kernel": 0.6796875}}, "feed_forward": {"intermediate_dense": {"bias": 0.05322265625, "kernel": 0.7421875}, "output_dense": {"bias": 0.0380859375, "kernel": 0.6484375}}, "final_layer_norm": {"bias": 0.0869140625, "scale": 0.0673828125}, "layer_norm": {"bias": 0.0849609375, "scale": 0.083984375}}, "16": {"attention": {"k_proj": {"bias": 0.0001220703125, "kernel": 0.20703125}, "out_proj": {"bias": 0.0390625, "kernel": 0.494140625}, "q_proj": {"bias": 0.01177978515625, "kernel": 0.203125}, "v_proj": {"bias": 0.058837890625, "kernel": 0.57421875}}, "feed_forward": {"intermediate_dense": {"bias": 0.052734375, "kernel": 0.7421875}, "output_dense": {"bias": 0.0380859375, "kernel": 0.66796875}}, "final_layer_norm": {"bias": 0.0830078125, "scale": 0.078125}, "layer_norm": {"bias": 0.0869140625, "scale": 0.10693359375}}, "17": {"attention": {"k_proj": {"bias": 0.00012683868408203125, "kernel": 0.2080078125}, "out_proj": {"bias": 0.04248046875, "kernel": 0.44921875}, "q_proj": {"bias": 0.0137939453125, "kernel": 0.212890625}, "v_proj": {"bias": 0.0654296875, "kernel": 0.56640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.05419921875, "kernel": 0.75}, "output_dense": {"bias": 0.041015625, "kernel": 0.65625}}, "final_layer_norm": {"bias": 0.0859375, "scale": 0.0732421875}, "layer_norm": {"bias": 0.0986328125, "scale": 0.134765625}}, "18": {"attention": {"k_proj": {"bias": 0.000133514404296875, "kernel": 0.212890625}, "out_proj": {"bias": 0.0400390625, "kernel": 0.515625}, "q_proj": {"bias": 0.0135498046875, "kernel": 0.212890625}, "v_proj": {"bias": 0.061767578125, "kernel": 0.54296875}}, "feed_forward": {"intermediate_dense": {"bias": 0.05224609375, "kernel": 0.76171875}, "output_dense": {"bias": 0.03857421875, "kernel": 0.70703125}}, "final_layer_norm": {"bias": 0.083984375, "scale": 0.06640625}, "layer_norm": {"bias": 0.091796875, "scale": 0.07666015625}}, "19": {"attention": {"k_proj": {"bias": 9.72747802734375e-05, "kernel": 0.177734375}, "out_proj": {"bias": 0.039794921875, "kernel": 0.4296875}, "q_proj": {"bias": 0.01129150390625, "kernel": 0.193359375}, "v_proj": {"bias": 0.05810546875, "kernel": 0.515625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0498046875, "kernel": 0.76953125}, "output_dense": {"bias": 0.0390625, "kernel": 0.7109375}}, "final_layer_norm": {"bias": 0.078125, "scale": 0.076171875}, "layer_norm": {"bias": 0.08447265625, "scale": 0.10205078125}}, "2": {"attention": {"k_proj": {"bias": 0.0001544952392578125, "kernel": 0.146484375}, "out_proj": {"bias": 0.07275390625, "kernel": 0.71484375}, "q_proj": {"bias": 0.01275634765625, "kernel": 0.162109375}, "v_proj": {"bias": 0.12158203125, "kernel": 0.84765625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0859375, "kernel": 1.390625}, "output_dense": {"bias": 0.0654296875, "kernel": 1.03125}}, "final_layer_norm": {"bias": 0.146484375, "scale": 0.134765625}, "layer_norm": {"bias": 0.1611328125, "scale": 0.13671875}}, "20": {"attention": {"k_proj": {"bias": 4.458427429199219e-05, "kernel": 0.1416015625}, "out_proj": {"bias": 0.0419921875, "kernel": 0.30078125}, "q_proj": {"bias": 0.01025390625, "kernel": 0.1787109375}, "v_proj": {"bias": 0.059814453125, "kernel": 0.390625}}, "feed_forward": {"intermediate_dense": {"bias": 0.05224609375, "kernel": 0.828125}, "output_dense": {"bias": 0.04052734375, "kernel": 0.703125}}, "final_layer_norm": {"bias": 0.08447265625, "scale": 0.076171875}, "layer_norm": {"bias": 0.08642578125, "scale": 0.08984375}}, "21": {"attention": {"k_proj": {"bias": 7.915496826171875e-05, "kernel": 0.1640625}, "out_proj": {"bias": 0.04150390625, "kernel": 0.431640625}, "q_proj": {"bias": 0.01068115234375, "kernel": 0.1904296875}, "v_proj": {"bias": 0.059814453125, "kernel": 0.5}}, "feed_forward": {"intermediate_dense": {"bias": 0.05126953125, "kernel": 0.8359375}, "output_dense": {"bias": 0.040771484375, "kernel": 0.71484375}}, "final_layer_norm": {"bias": 0.08349609375, "scale": 0.0732421875}, "layer_norm": {"bias": 0.0791015625, "scale": 0.087890625}}, "22": {"attention": {"k_proj": {"bias": 5.245208740234375e-05, "kernel": 0.177734375}, "out_proj": {"bias": 0.0439453125, "kernel": 0.359375}, "q_proj": {"bias": 0.01263427734375, "kernel": 0.21484375}, "v_proj": {"bias": 0.06201171875, "kernel": 0.435546875}}, "feed_forward": {"intermediate_dense": {"bias": 0.05615234375, "kernel": 0.8671875}, "output_dense": {"bias": 0.044189453125, "kernel": 0.69921875}}, "final_layer_norm": {"bias": 0.0908203125, "scale": 0.08642578125}, "layer_norm": {"bias": 0.0927734375, "scale": 0.18359375}}, "23": {"attention": {"k_proj": {"bias": 0.00014781951904296875, "kernel": 0.20703125}, "out_proj": {"bias": 0.04638671875, "kernel": 0.6015625}, "q_proj": {"bias": 0.0125732421875, "kernel": 0.212890625}, "v_proj": {"bias": 0.068359375, "kernel": 0.65625}}, "feed_forward": {"intermediate_dense": {"bias": 0.053466796875, "kernel": 0.8203125}, "output_dense": {"bias": 0.04541015625, "kernel": 0.66796875}}, "final_layer_norm": {"bias": 0.08642578125, "scale": 0.08056640625}, "layer_norm": {"bias": 0.09716796875, "scale": 0.09814453125}}, "24": {"attention": {"k_proj": {"bias": 0.00011205673217773438, "kernel": 0.1962890625}, "out_proj": {"bias": 0.040771484375, "kernel": 0.47265625}, "q_proj": {"bias": 0.0130615234375, "kernel": 0.2099609375}, "v_proj": {"bias": 0.0634765625, "kernel": 0.5390625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0517578125, "kernel": 0.8125}, "output_dense": {"bias": 0.039306640625, "kernel": 0.625}}, "final_layer_norm": {"bias": 0.0859375, "scale": 0.08203125}, "layer_norm": {"bias": 0.1015625, "scale": 0.07666015625}}, "25": {"attention": {"k_proj": {"bias": 0.0001220703125, "kernel": 0.181640625}, "out_proj": {"bias": 0.041015625, "kernel": 0.515625}, "q_proj": {"bias": 0.011962890625, "kernel": 0.189453125}, "v_proj": {"bias": 0.06494140625, "kernel": 0.5859375}}, "feed_forward": {"intermediate_dense": {"bias": 0.05029296875, "kernel": 0.78515625}, "output_dense": {"bias": 0.040771484375, "kernel": 0.6171875}}, "final_layer_norm": {"bias": 0.08251953125, "scale": 0.0791015625}, "layer_norm": {"bias": 0.0966796875, "scale": 0.12109375}}, "26": {"attention": {"k_proj": {"bias": 0.00011444091796875, "kernel": 0.197265625}, "out_proj": {"bias": 0.041015625, "kernel": 0.5}, "q_proj": {"bias": 0.0133056640625, "kernel": 0.21484375}, "v_proj": {"bias": 0.0634765625, "kernel": 0.5703125}}, "feed_forward": {"intermediate_dense": {"bias": 0.05029296875, "kernel": 0.734375}, "output_dense": {"bias": 0.041259765625, "kernel": 0.6171875}}, "final_layer_norm": {"bias": 0.0849609375, "scale": 0.068359375}, "layer_norm": {"bias": 0.08984375, "scale": 0.154296875}}, "27": {"attention": {"k_proj": {"bias": 0.00017452239990234375, "kernel": 0.22265625}, "out_proj": {"bias": 0.037353515625, "kernel": 0.5859375}, "q_proj": {"bias": 0.01434326171875, "kernel": 0.232421875}, "v_proj": {"bias": 0.06103515625, "kernel": 0.609375}}, "feed_forward": {"intermediate_dense": {"bias": 0.04833984375, "kernel": 0.7109375}, "output_dense": {"bias": 0.0380859375, "kernel": 0.6171875}}, "final_layer_norm": {"bias": 0.0830078125, "scale": 0.0859375}, "layer_norm": {"bias": 0.0966796875, "scale": 0.09375}}, "28": {"attention": {"k_proj": {"bias": 0.000244140625, "kernel": 0.2060546875}, "out_proj": {"bias": 0.03466796875, "kernel": 0.6015625}, "q_proj": {"bias": 0.012939453125, "kernel": 0.232421875}, "v_proj": {"bias": 0.052978515625, "kernel": 0.578125}}, "feed_forward": {"intermediate_dense": {"bias": 0.04541015625, "kernel": 0.69921875}, "output_dense": {"bias": 0.0361328125, "kernel": 0.63671875}}, "final_layer_norm": {"bias": 0.076171875, "scale": 0.0693359375}, "layer_norm": {"bias": 0.08544921875, "scale": 0.07373046875}}, "29": {"attention": {"k_proj": {"bias": 8.7738037109375e-05, "kernel": 0.18359375}, "out_proj": {"bias": 0.032958984375, "kernel": 0.51171875}, "q_proj": {"bias": 0.0106201171875, "kernel": 0.1962890625}, "v_proj": {"bias": 0.0478515625, "kernel": 0.5078125}}, "feed_forward": {"intermediate_dense": {"bias": 0.04296875, "kernel": 0.7578125}, "output_dense": {"bias": 0.032958984375, "kernel": 0.640625}}, "final_layer_norm": {"bias": 0.06591796875, "scale": 0.06005859375}, "layer_norm": {"bias": 0.07958984375, "scale": 0.0634765625}}, "3": {"attention": {"k_proj": {"bias": 0.000270843505859375, "kernel": 0.23828125}, "out_proj": {"bias": 0.0712890625, "kernel": 0.91015625}, "q_proj": {"bias": 0.01708984375, "kernel": 0.2421875}, "v_proj": {"bias": 0.11376953125, "kernel": 1.015625}}, "feed_forward": {"intermediate_dense": {"bias": 0.08642578125, "kernel": 1.40625}, "output_dense": {"bias": 0.0634765625, "kernel": 1.015625}}, "final_layer_norm": {"bias": 0.1455078125, "scale": 0.134765625}, "layer_norm": {"bias": 0.16015625, "scale": 0.13671875}}, "30": {"attention": {"k_proj": {"bias": 0.00011014938354492188, "kernel": 0.2236328125}, "out_proj": {"bias": 0.03271484375, "kernel": 0.5546875}, "q_proj": {"bias": 0.0120849609375, "kernel": 0.2490234375}, "v_proj": {"bias": 0.048095703125, "kernel": 0.5703125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0419921875, "kernel": 0.765625}, "output_dense": {"bias": 0.032470703125, "kernel": 0.57421875}}, "final_layer_norm": {"bias": 0.0654296875, "scale": 0.057861328125}, "layer_norm": {"bias": 0.06640625, "scale": 0.083984375}}, "31": {"attention": {"k_proj": {"bias": 0.00012683868408203125, "kernel": 0.23828125}, "out_proj": {"bias": 0.03125, "kernel": 0.5390625}, "q_proj": {"bias": 0.012939453125, "kernel": 0.2578125}, "v_proj": {"bias": 0.0458984375, "kernel": 0.56640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.04052734375, "kernel": 0.73828125}, "output_dense": {"bias": 0.030029296875, "kernel": 0.5703125}}, "final_layer_norm": {"bias": 0.064453125, "scale": 0.06884765625}, "layer_norm": {"bias": 0.068359375, "scale": 0.0859375}}, "32": {"attention": {"k_proj": {"bias": 0.00011682510375976562, "kernel": 0.232421875}, "out_proj": {"bias": 0.028076171875, "kernel": 0.46875}, "q_proj": {"bias": 0.0118408203125, "kernel": 0.2578125}, "v_proj": {"bias": 0.0390625, "kernel": 0.494140625}}, "feed_forward": {"intermediate_dense": {"bias": 0.03759765625, "kernel": 0.703125}, "output_dense": {"bias": 0.02685546875, "kernel": 0.53125}}, "final_layer_norm": {"bias": 0.060546875, "scale": 0.06103515625}, "layer_norm": {"bias": 0.05859375, "scale": 0.06689453125}}, "33": {"attention": {"k_proj": {"bias": 0.00012493133544921875, "kernel": 0.2412109375}, "out_proj": {"bias": 0.02587890625, "kernel": 0.494140625}, "q_proj": {"bias": 0.0120849609375, "kernel": 0.265625}, "v_proj": {"bias": 0.037109375, "kernel": 0.5}}, "feed_forward": {"intermediate_dense": {"bias": 0.03466796875, "kernel": 0.66015625}, "output_dense": {"bias": 0.024658203125, "kernel": 0.546875}}, "final_layer_norm": {"bias": 0.05712890625, "scale": 0.0673828125}, "layer_norm": {"bias": 0.0537109375, "scale": 0.048828125}}, "34": {"attention": {"k_proj": {"bias": 0.00016689300537109375, "kernel": 0.220703125}, "out_proj": {"bias": 0.0220947265625, "kernel": 0.4921875}, "q_proj": {"bias": 0.0111083984375, "kernel": 0.244140625}, "v_proj": {"bias": 0.03125, "kernel": 0.46484375}}, "feed_forward": {"intermediate_dense": {"bias": 0.03173828125, "kernel": 0.63671875}, "output_dense": {"bias": 0.02099609375, "kernel": 0.54296875}}, "final_layer_norm": {"bias": 0.05126953125, "scale": 0.05419921875}, "layer_norm": {"bias": 0.04736328125, "scale": 0.052001953125}}, "35": {"attention": {"k_proj": {"bias": 0.00014495849609375, "kernel": 0.2001953125}, "out_proj": {"bias": 0.0185546875, "kernel": 0.4765625}, "q_proj": {"bias": 0.01025390625, "kernel": 0.2275390625}, "v_proj": {"bias": 0.02490234375, "kernel": 0.416015625}}, "feed_forward": {"intermediate_dense": {"bias": 0.026123046875, "kernel": 0.49609375}, "output_dense": {"bias": 0.0185546875, "kernel": 0.4453125}}, "final_layer_norm": {"bias": 0.04248046875, "scale": 0.039306640625}, "layer_norm": {"bias": 0.04345703125, "scale": 0.050537109375}}, "36": {"attention": {"k_proj": {"bias": 7.867813110351562e-05, "kernel": 0.13671875}, "out_proj": {"bias": 0.0181884765625, "kernel": 0.41796875}, "q_proj": {"bias": 0.0067138671875, "kernel": 0.142578125}, "v_proj": {"bias": 0.0234375, "kernel": 0.35546875}}, "feed_forward": {"intermediate_dense": {"bias": 0.025634765625, "kernel": 0.470703125}, "output_dense": {"bias": 0.01806640625, "kernel": 0.40234375}}, "final_layer_norm": {"bias": 0.04150390625, "scale": 0.042236328125}, "layer_norm": {"bias": 0.0361328125, "scale": 0.03369140625}}, "37": {"attention": {"k_proj": {"bias": 6.4849853515625e-05, "kernel": 0.1162109375}, "out_proj": {"bias": 0.017578125, "kernel": 0.43359375}, "q_proj": {"bias": 0.006072998046875, "kernel": 0.1298828125}, "v_proj": {"bias": 0.0250244140625, "kernel": 0.392578125}}, "feed_forward": {"intermediate_dense": {"bias": 0.025634765625, "kernel": 0.484375}, "output_dense": {"bias": 0.0174560546875, "kernel": 0.3984375}}, "final_layer_norm": {"bias": 0.041259765625, "scale": 0.0380859375}, "layer_norm": {"bias": 0.042236328125, "scale": 0.03515625}}, "38": {"attention": {"k_proj": {"bias": 6.771087646484375e-05, "kernel": 0.119140625}, "out_proj": {"bias": 0.0159912109375, "kernel": 0.421875}, "q_proj": {"bias": 0.005126953125, "kernel": 0.11767578125}, "v_proj": {"bias": 0.0224609375, "kernel": 0.373046875}}, "feed_forward": {"intermediate_dense": {"bias": 0.02490234375, "kernel": 0.498046875}, "output_dense": {"bias": 0.015869140625, "kernel": 0.421875}}, "final_layer_norm": {"bias": 0.041015625, "scale": 0.03857421875}, "layer_norm": {"bias": 0.037353515625, "scale": 0.0281982421875}}, "39": {"attention": {"k_proj": {"bias": 6.628036499023438e-05, "kernel": 0.11669921875}, "out_proj": {"bias": 0.01409912109375, "kernel": 0.4140625}, "q_proj": {"bias": 0.00518798828125, "kernel": 0.1259765625}, "v_proj": {"bias": 0.019775390625, "kernel": 0.34765625}}, "feed_forward": {"intermediate_dense": {"bias": 0.023681640625, "kernel": 0.5078125}, "output_dense": {"bias": 0.01373291015625, "kernel": 0.50390625}}, "final_layer_norm": {"bias": 0.038330078125, "scale": 0.03759765625}, "layer_norm": {"bias": 0.03271484375, "scale": 0.033203125}}, "4": {"attention": {"k_proj": {"bias": 0.0003528594970703125, "kernel": 0.26953125}, "out_proj": {"bias": 0.06591796875, "kernel": 1.046875}, "q_proj": {"bias": 0.01806640625, "kernel": 0.2890625}, "v_proj": {"bias": 0.1015625, "kernel": 1.09375}}, "feed_forward": {"intermediate_dense": {"bias": 0.08740234375, "kernel": 1.359375}, "output_dense": {"bias": 0.06201171875, "kernel": 0.99609375}}, "final_layer_norm": {"bias": 0.1474609375, "scale": 0.125}, "layer_norm": {"bias": 0.140625, "scale": 0.1455078125}}, "40": {"attention": {"k_proj": {"bias": 4.00543212890625e-05, "kernel": 0.09326171875}, "out_proj": {"bias": 0.01312255859375, "kernel": 0.375}, "q_proj": {"bias": 0.0040283203125, "kernel": 0.09814453125}, "v_proj": {"bias": 0.0185546875, "kernel": 0.34375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0234375, "kernel": 0.51953125}, "output_dense": {"bias": 0.01318359375, "kernel": 0.41796875}}, "final_layer_norm": {"bias": 0.041015625, "scale": 0.041015625}, "layer_norm": {"bias": 0.02978515625, "scale": 0.029541015625}}, "41": {"attention": {"k_proj": {"bias": 4.553794860839844e-05, "kernel": 0.10009765625}, "out_proj": {"bias": 0.01153564453125, "kernel": 0.34375}, "q_proj": {"bias": 0.00439453125, "kernel": 0.11328125}, "v_proj": {"bias": 0.01806640625, "kernel": 0.3671875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0205078125, "kernel": 0.515625}, "output_dense": {"bias": 0.01190185546875, "kernel": 0.4296875}}, "final_layer_norm": {"bias": 0.03515625, "scale": 0.03564453125}, "layer_norm": {"bias": 0.029052734375, "scale": 0.038818359375}}, "42": {"attention": {"k_proj": {"bias": 3.4809112548828125e-05, "kernel": 0.0654296875}, "out_proj": {"bias": 0.0115966796875, "kernel": 0.2890625}, "q_proj": {"bias": 0.002899169921875, "kernel": 0.07373046875}, "v_proj": {"bias": 0.015869140625, "kernel": 0.3046875}}, "feed_forward": {"intermediate_dense": {"bias": 0.018310546875, "kernel": 0.48828125}, "output_dense": {"bias": 0.01220703125, "kernel": 0.36328125}}, "final_layer_norm": {"bias": 0.02978515625, "scale": 0.03369140625}, "layer_norm": {"bias": 0.02490234375, "scale": 0.037109375}}, "43": {"attention": {"k_proj": {"bias": 2.4557113647460938e-05, "kernel": 0.044677734375}, "out_proj": {"bias": 0.012451171875, "kernel": 0.244140625}, "q_proj": {"bias": 0.0020599365234375, "kernel": 0.048583984375}, "v_proj": {"bias": 0.01611328125, "kernel": 0.28125}}, "feed_forward": {"intermediate_dense": {"bias": 0.019775390625, "kernel": 0.5546875}, "output_dense": {"bias": 0.0130615234375, "kernel": 0.3671875}}, "final_layer_norm": {"bias": 0.033203125, "scale": 0.03173828125}, "layer_norm": {"bias": 0.02734375, "scale": 0.03369140625}}, "44": {"attention": {"k_proj": {"bias": 1.6927719116210938e-05, "kernel": 0.04150390625}, "out_proj": {"bias": 0.01318359375, "kernel": 0.263671875}, "q_proj": {"bias": 0.0018463134765625, "kernel": 0.04443359375}, "v_proj": {"bias": 0.016357421875, "kernel": 0.29296875}}, "feed_forward": {"intermediate_dense": {"bias": 0.018310546875, "kernel": 0.5390625}, "output_dense": {"bias": 0.0140380859375, "kernel": 0.328125}}, "final_layer_norm": {"bias": 0.029296875, "scale": 0.0277099609375}, "layer_norm": {"bias": 0.03125, "scale": 0.024169921875}}, "45": {"attention": {"k_proj": {"bias": 1.7881393432617188e-05, "kernel": 0.041748046875}, "out_proj": {"bias": 0.013427734375, "kernel": 0.240234375}, "q_proj": {"bias": 0.0020294189453125, "kernel": 0.04443359375}, "v_proj": {"bias": 0.0172119140625, "kernel": 0.28515625}}, "feed_forward": {"intermediate_dense": {"bias": 0.01519775390625, "kernel": 0.443359375}, "output_dense": {"bias": 0.0137939453125, "kernel": 0.2734375}}, "final_layer_norm": {"bias": 0.023681640625, "scale": 0.02392578125}, "layer_norm": {"bias": 0.037841796875, "scale": 0.0284423828125}}, "46": {"attention": {"k_proj": {"bias": 1.7404556274414062e-05, "kernel": 0.04345703125}, "out_proj": {"bias": 0.012451171875, "kernel": 0.2236328125}, "q_proj": {"bias": 0.0019989013671875, "kernel": 0.043701171875}, "v_proj": {"bias": 0.0172119140625, "kernel": 0.28125}}, "feed_forward": {"intermediate_dense": {"bias": 0.011962890625, "kernel": 0.32421875}, "output_dense": {"bias": 0.01214599609375, "kernel": 0.2314453125}}, "final_layer_norm": {"bias": 0.01708984375, "scale": 0.023193359375}, "layer_norm": {"bias": 0.0439453125, "scale": 0.0322265625}}, "47": {"attention": {"k_proj": {"bias": 1.6450881958007812e-05, "kernel": 0.04541015625}, "out_proj": {"bias": 0.0118408203125, "kernel": 0.169921875}, "q_proj": {"bias": 0.002227783203125, "kernel": 0.04052734375}, "v_proj": {"bias": 0.020751953125, "kernel": 0.291015625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0093994140625, "kernel": 0.1953125}, "output_dense": {"bias": 0.010498046875, "kernel": 0.169921875}}, "final_layer_norm": {"bias": 0.018798828125, "scale": 0.0174560546875}, "layer_norm": {"bias": 0.0546875, "scale": 0.03955078125}}, "5": {"attention": {"k_proj": {"bias": 0.0001354217529296875, "kernel": 0.25390625}, "out_proj": {"bias": 0.06298828125, "kernel": 0.6875}, "q_proj": {"bias": 0.0157470703125, "kernel": 0.26953125}, "v_proj": {"bias": 0.0986328125, "kernel": 0.8359375}}, "feed_forward": {"intermediate_dense": {"bias": 0.08056640625, "kernel": 1.1875}, "output_dense": {"bias": 0.060546875, "kernel": 0.8984375}}, "final_layer_norm": {"bias": 0.1357421875, "scale": 0.11328125}, "layer_norm": {"bias": 0.1484375, "scale": 0.1162109375}}, "6": {"attention": {"k_proj": {"bias": 0.0001468658447265625, "kernel": 0.3125}, "out_proj": {"bias": 0.0576171875, "kernel": 0.81640625}, "q_proj": {"bias": 0.018798828125, "kernel": 0.30859375}, "v_proj": {"bias": 0.0986328125, "kernel": 0.96875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0751953125, "kernel": 1.1484375}, "output_dense": {"bias": 0.0576171875, "kernel": 0.83203125}}, "final_layer_norm": {"bias": 0.1259765625, "scale": 0.119140625}, "layer_norm": {"bias": 0.142578125, "scale": 0.11181640625}}, "7": {"attention": {"k_proj": {"bias": 0.00060272216796875, "kernel": 0.27734375}, "out_proj": {"bias": 0.05810546875, "kernel": 0.8515625}, "q_proj": {"bias": 0.0172119140625, "kernel": 0.2734375}, "v_proj": {"bias": 0.09326171875, "kernel": 0.94921875}}, "feed_forward": {"intermediate_dense": {"bias": 0.07421875, "kernel": 1.15625}, "output_dense": {"bias": 0.057373046875, "kernel": 0.828125}}, "final_layer_norm": {"bias": 0.123046875, "scale": 0.1083984375}, "layer_norm": {"bias": 0.1435546875, "scale": 0.1103515625}}, "8": {"attention": {"k_proj": {"bias": 0.0002899169921875, "kernel": 0.275390625}, "out_proj": {"bias": 0.05419921875, "kernel": 0.74609375}, "q_proj": {"bias": 0.0166015625, "kernel": 0.26953125}, "v_proj": {"bias": 0.0888671875, "kernel": 0.87109375}}, "feed_forward": {"intermediate_dense": {"bias": 0.072265625, "kernel": 1.1171875}, "output_dense": {"bias": 0.05419921875, "kernel": 0.80859375}}, "final_layer_norm": {"bias": 0.12353515625, "scale": 0.126953125}, "layer_norm": {"bias": 0.142578125, "scale": 0.0947265625}}, "9": {"attention": {"k_proj": {"bias": 0.000244140625, "kernel": 0.29296875}, "out_proj": {"bias": 0.0478515625, "kernel": 0.8671875}, "q_proj": {"bias": 0.0166015625, "kernel": 0.296875}, "v_proj": {"bias": 0.076171875, "kernel": 0.953125}}, "feed_forward": {"intermediate_dense": {"bias": 0.06591796875, "kernel": 1.046875}, "output_dense": {"bias": 0.04833984375, "kernel": 0.8046875}}, "final_layer_norm": {"bias": 0.109375, "scale": 0.09619140625}, "layer_norm": {"bias": 0.125, "scale": 0.1357421875}}}, "pos_conv_embed": {"conv": {"bias": 0.1142578125, "weight_g": 0.0810546875, "weight_v": 0.90234375}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "1": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "2": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "3": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "4": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "5": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "6": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}}}, "feature_projection": {"layer_norm": {"bias": 0.24609375, "scale": 0.357421875}, "projection": {"bias": 0.1630859375, "kernel": 3.25}}, "masked_spec_embed": 0.0}}, "layer_param_norm/": {"lm_head": {"bias": 0.005374001804739237, "kernel": 4.533278465270996}, "wav2vec2": {"encoder": {"layer_norm": {"bias": 0.8080298900604248, "scale": 22.264007568359375}, "layers": {"0": {"attention": {"k_proj": {"bias": 0.015297709964215755, "kernel": 25.90625762939453}, "out_proj": {"bias": 1.5427767038345337, "kernel": 25.075889587402344}, "q_proj": {"bias": 1.299546718597412, "kernel": 26.18346405029297}, "v_proj": {"bias": 0.345625638961792, "kernel": 25.805116653442383}}, "feed_forward": {"intermediate_dense": {"bias": 1.7607402801513672, "kernel": 95.1104507446289}, "output_dense": {"bias": 1.0213158130645752, "kernel": 90.89292907714844}}, "final_layer_norm": {"bias": 1.2825312614440918, "scale": 19.879627227783203}, "layer_norm": {"bias": 3.2899866104125977, "scale": 16.03346061706543}}, "1": {"attention": {"k_proj": {"bias": 0.015543273650109768, "kernel": 40.23906707763672}, "out_proj": {"bias": 1.291033148765564, "kernel": 41.643409729003906}, "q_proj": {"bias": 2.856388807296753, "kernel": 40.081321716308594}, "v_proj": {"bias": 0.28269922733306885, "kernel": 40.127037048339844}}, "feed_forward": {"intermediate_dense": {"bias": 1.5773839950561523, "kernel": 93.20049285888672}, "output_dense": {"bias": 0.8035283088684082, "kernel": 84.1153564453125}}, "final_layer_norm": {"bias": 1.1325836181640625, "scale": 18.40676498413086}, "layer_norm": {"bias": 1.7384239435195923, "scale": 19.375083923339844}}, "10": {"attention": {"k_proj": {"bias": 0.03287004306912422, "kernel": 47.28404235839844}, "out_proj": {"bias": 1.2187557220458984, "kernel": 50.11613464355469}, "q_proj": {"bias": 2.4209554195404053, "kernel": 47.2443962097168}, "v_proj": {"bias": 0.31421926617622375, "kernel": 50.314510345458984}}, "feed_forward": {"intermediate_dense": {"bias": 1.6216678619384766, "kernel": 97.55558013916016}, "output_dense": {"bias": 0.5634652376174927, "kernel": 91.49652099609375}}, "final_layer_norm": {"bias": 2.1996896266937256, "scale": 20.35552978515625}, "layer_norm": {"bias": 1.6904395818710327, "scale": 22.304258346557617}}, "11": {"attention": {"k_proj": {"bias": 0.09360281378030777, "kernel": 47.06895065307617}, "out_proj": {"bias": 1.0710809230804443, "kernel": 49.30097961425781}, "q_proj": {"bias": 2.4705262184143066, "kernel": 46.792503356933594}, "v_proj": {"bias": 0.35516369342803955, "kernel": 49.844058990478516}}, "feed_forward": {"intermediate_dense": {"bias": 1.6704740524291992, "kernel": 98.30435943603516}, "output_dense": {"bias": 0.5463806986808777, "kernel": 93.20780944824219}}, "final_layer_norm": {"bias": 2.181198835372925, "scale": 20.362133026123047}, "layer_norm": {"bias": 1.6742100715637207, "scale": 22.603408813476562}}, "12": {"attention": {"k_proj": {"bias": 0.036737218499183655, "kernel": 47.66357421875}, "out_proj": {"bias": 1.0559866428375244, "kernel": 49.61408233642578}, "q_proj": {"bias": 2.357896566390991, "kernel": 47.41871643066406}, "v_proj": {"bias": 0.3417784869670868, "kernel": 50.03047180175781}}, "feed_forward": {"intermediate_dense": {"bias": 1.7137947082519531, "kernel": 99.146240234375}, "output_dense": {"bias": 0.5360507369041443, "kernel": 94.75489807128906}}, "final_layer_norm": {"bias": 2.136099338531494, "scale": 20.325571060180664}, "layer_norm": {"bias": 1.738227128982544, "scale": 23.155086517333984}}, "13": {"attention": {"k_proj": {"bias": 0.06430421769618988, "kernel": 49.546913146972656}, "out_proj": {"bias": 1.0499918460845947, "kernel": 49.25297546386719}, "q_proj": {"bias": 2.3327224254608154, "kernel": 49.40333938598633}, "v_proj": {"bias": 0.36992135643959045, "kernel": 49.43354797363281}}, "feed_forward": {"intermediate_dense": {"bias": 1.7685878276824951, "kernel": 99.7515869140625}, "output_dense": {"bias": 0.5527087450027466, "kernel": 95.1888656616211}}, "final_layer_norm": {"bias": 2.0203824043273926, "scale": 20.469600677490234}, "layer_norm": {"bias": 1.8370931148529053, "scale": 23.37569808959961}}, "14": {"attention": {"k_proj": {"bias": 0.15042129158973694, "kernel": 49.753089904785156}, "out_proj": {"bias": 1.2119271755218506, "kernel": 47.694454193115234}, "q_proj": {"bias": 2.397547960281372, "kernel": 49.8127555847168}, "v_proj": {"bias": 0.3708009719848633, "kernel": 47.276058197021484}}, "feed_forward": {"intermediate_dense": {"bias": 1.804580807685852, "kernel": 100.37144470214844}, "output_dense": {"bias": 0.5676894783973694, "kernel": 96.55368041992188}}, "final_layer_norm": {"bias": 2.1550378799438477, "scale": 20.61404800415039}, "layer_norm": {"bias": 1.9678292274475098, "scale": 23.551532745361328}}, "15": {"attention": {"k_proj": {"bias": 0.0767417922616005, "kernel": 49.8840446472168}, "out_proj": {"bias": 1.2613630294799805, "kernel": 48.29443359375}, "q_proj": {"bias": 2.542891025543213, "kernel": 49.95210266113281}, "v_proj": {"bias": 0.4033506512641907, "kernel": 47.94218444824219}}, "feed_forward": {"intermediate_dense": {"bias": 1.8131868839263916, "kernel": 100.18038940429688}, "output_dense": {"bias": 0.7156275510787964, "kernel": 97.2337646484375}}, "final_layer_norm": {"bias": 2.0794360637664795, "scale": 20.705703735351562}, "layer_norm": {"bias": 2.216689109802246, "scale": 23.69681167602539}}, "16": {"attention": {"k_proj": {"bias": 0.03383852168917656, "kernel": 49.79258728027344}, "out_proj": {"bias": 1.195717215538025, "kernel": 47.76416015625}, "q_proj": {"bias": 2.625915050506592, "kernel": 49.676025390625}, "v_proj": {"bias": 0.3590272068977356, "kernel": 47.44544982910156}}, "feed_forward": {"intermediate_dense": {"bias": 1.8110814094543457, "kernel": 100.83499145507812}, "output_dense": {"bias": 0.7390604615211487, "kernel": 98.10028076171875}}, "final_layer_norm": {"bias": 2.1538476943969727, "scale": 21.191974639892578}, "layer_norm": {"bias": 2.1496143341064453, "scale": 22.609249114990234}}, "17": {"attention": {"k_proj": {"bias": 0.022897878661751747, "kernel": 49.99964904785156}, "out_proj": {"bias": 1.138910174369812, "kernel": 47.08134460449219}, "q_proj": {"bias": 2.697793483734131, "kernel": 50.10173034667969}, "v_proj": {"bias": 0.3968643248081207, "kernel": 46.752471923828125}}, "feed_forward": {"intermediate_dense": {"bias": 1.8217923641204834, "kernel": 101.91801452636719}, "output_dense": {"bias": 0.7552443742752075, "kernel": 98.49440002441406}}, "final_layer_norm": {"bias": 2.2400150299072266, "scale": 21.752172470092773}, "layer_norm": {"bias": 2.0635910034179688, "scale": 22.168128967285156}}, "18": {"attention": {"k_proj": {"bias": 0.06455129384994507, "kernel": 50.2756233215332}, "out_proj": {"bias": 1.2418396472930908, "kernel": 48.103477478027344}, "q_proj": {"bias": 2.5923728942871094, "kernel": 50.66144561767578}, "v_proj": {"bias": 0.42529594898223877, "kernel": 47.63047409057617}}, "feed_forward": {"intermediate_dense": {"bias": 1.864607810974121, "kernel": 102.21928405761719}, "output_dense": {"bias": 0.8704050779342651, "kernel": 100.13597869873047}}, "final_layer_norm": {"bias": 2.3443479537963867, "scale": 21.71319580078125}, "layer_norm": {"bias": 2.242708206176758, "scale": 23.880699157714844}}, "19": {"attention": {"k_proj": {"bias": 0.018700532615184784, "kernel": 49.55120086669922}, "out_proj": {"bias": 1.2170469760894775, "kernel": 47.99177551269531}, "q_proj": {"bias": 2.867172956466675, "kernel": 49.98257827758789}, "v_proj": {"bias": 0.38928988575935364, "kernel": 47.23653030395508}}, "feed_forward": {"intermediate_dense": {"bias": 1.9199304580688477, "kernel": 102.79731750488281}, "output_dense": {"bias": 0.9345382452011108, "kernel": 101.03840637207031}}, "final_layer_norm": {"bias": 2.303575038909912, "scale": 22.072988510131836}, "layer_norm": {"bias": 2.165545701980591, "scale": 23.089561462402344}}, "2": {"attention": {"k_proj": {"bias": 0.03912075608968735, "kernel": 46.15593719482422}, "out_proj": {"bias": 1.212327241897583, "kernel": 43.85417175292969}, "q_proj": {"bias": 3.043458938598633, "kernel": 45.923580169677734}, "v_proj": {"bias": 0.3091241717338562, "kernel": 43.8536376953125}}, "feed_forward": {"intermediate_dense": {"bias": 1.6170917749404907, "kernel": 98.26874542236328}, "output_dense": {"bias": 0.691720187664032, "kernel": 87.24884033203125}}, "final_layer_norm": {"bias": 1.4530998468399048, "scale": 20.986175537109375}, "layer_norm": {"bias": 1.6662005186080933, "scale": 22.055950164794922}}, "20": {"attention": {"k_proj": {"bias": 0.016996946185827255, "kernel": 49.52137756347656}, "out_proj": {"bias": 1.2459980249404907, "kernel": 47.377342224121094}, "q_proj": {"bias": 2.780735492706299, "kernel": 50.308494567871094}, "v_proj": {"bias": 0.3616185486316681, "kernel": 46.27649688720703}}, "feed_forward": {"intermediate_dense": {"bias": 1.9209074974060059, "kernel": 104.06049346923828}, "output_dense": {"bias": 1.0503175258636475, "kernel": 101.66544342041016}}, "final_layer_norm": {"bias": 2.3307595252990723, "scale": 23.01681900024414}, "layer_norm": {"bias": 2.141833543777466, "scale": 23.23312759399414}}, "21": {"attention": {"k_proj": {"bias": 0.038573622703552246, "kernel": 49.96668243408203}, "out_proj": {"bias": 1.281657338142395, "kernel": 47.41773223876953}, "q_proj": {"bias": 2.724912166595459, "kernel": 50.810585021972656}, "v_proj": {"bias": 0.41661179065704346, "kernel": 46.52442932128906}}, "feed_forward": {"intermediate_dense": {"bias": 1.9636482000350952, "kernel": 104.25628662109375}, "output_dense": {"bias": 1.121211051940918, "kernel": 101.98957824707031}}, "final_layer_norm": {"bias": 2.3598103523254395, "scale": 22.663257598876953}, "layer_norm": {"bias": 2.213761568069458, "scale": 23.512542724609375}}, "22": {"attention": {"k_proj": {"bias": 0.021726980805397034, "kernel": 50.355010986328125}, "out_proj": {"bias": 1.19993257522583, "kernel": 46.87171173095703}, "q_proj": {"bias": 2.807509422302246, "kernel": 50.73841094970703}, "v_proj": {"bias": 0.3695211112499237, "kernel": 46.739681243896484}}, "feed_forward": {"intermediate_dense": {"bias": 1.8950753211975098, "kernel": 104.65191650390625}, "output_dense": {"bias": 1.1306051015853882, "kernel": 101.26654052734375}}, "final_layer_norm": {"bias": 2.24462628364563, "scale": 22.18397331237793}, "layer_norm": {"bias": 2.2087833881378174, "scale": 22.520572662353516}}, "23": {"attention": {"k_proj": {"bias": 0.12187439948320389, "kernel": 51.4569091796875}, "out_proj": {"bias": 1.3276864290237427, "kernel": 47.8717041015625}, "q_proj": {"bias": 2.642512798309326, "kernel": 51.56937789916992}, "v_proj": {"bias": 0.5207079648971558, "kernel": 48.512271881103516}}, "feed_forward": {"intermediate_dense": {"bias": 1.8721318244934082, "kernel": 104.44605255126953}, "output_dense": {"bias": 1.1086409091949463, "kernel": 102.06392669677734}}, "final_layer_norm": {"bias": 2.4933290481567383, "scale": 22.138351440429688}, "layer_norm": {"bias": 2.696870803833008, "scale": 23.72940444946289}}, "24": {"attention": {"k_proj": {"bias": 0.057247623801231384, "kernel": 49.93940734863281}, "out_proj": {"bias": 1.3834304809570312, "kernel": 49.85343933105469}, "q_proj": {"bias": 2.8023152351379395, "kernel": 49.932952880859375}, "v_proj": {"bias": 0.4749101400375366, "kernel": 49.92997741699219}}, "feed_forward": {"intermediate_dense": {"bias": 1.9902331829071045, "kernel": 103.91812896728516}, "output_dense": {"bias": 1.1460776329040527, "kernel": 104.93864440917969}}, "final_layer_norm": {"bias": 2.5984978675842285, "scale": 22.19563865661621}, "layer_norm": {"bias": 2.419949769973755, "scale": 23.271753311157227}}, "25": {"attention": {"k_proj": {"bias": 0.04809317737817764, "kernel": 50.49169158935547}, "out_proj": {"bias": 1.1956086158752441, "kernel": 47.76563262939453}, "q_proj": {"bias": 2.878556728363037, "kernel": 50.278690338134766}, "v_proj": {"bias": 0.5568146705627441, "kernel": 48.3024787902832}}, "feed_forward": {"intermediate_dense": {"bias": 1.8899657726287842, "kernel": 104.18589782714844}, "output_dense": {"bias": 1.0264028310775757, "kernel": 104.8681869506836}}, "final_layer_norm": {"bias": 2.3023805618286133, "scale": 22.731630325317383}, "layer_norm": {"bias": 2.579611301422119, "scale": 22.4217529296875}}, "26": {"attention": {"k_proj": {"bias": 0.07336914539337158, "kernel": 50.696044921875}, "out_proj": {"bias": 1.133030652999878, "kernel": 48.537498474121094}, "q_proj": {"bias": 2.836280345916748, "kernel": 50.4609489440918}, "v_proj": {"bias": 0.4935336410999298, "kernel": 49.14472198486328}}, "feed_forward": {"intermediate_dense": {"bias": 1.9829156398773193, "kernel": 103.61570739746094}, "output_dense": {"bias": 0.9871194362640381, "kernel": 102.03955078125}}, "final_layer_norm": {"bias": 1.9355385303497314, "scale": 21.587596893310547}, "layer_norm": {"bias": 2.483595371246338, "scale": 22.861915588378906}}, "27": {"attention": {"k_proj": {"bias": 0.3739147186279297, "kernel": 51.352256774902344}, "out_proj": {"bias": 1.3600618839263916, "kernel": 49.8607292175293}, "q_proj": {"bias": 2.6183319091796875, "kernel": 51.21380615234375}, "v_proj": {"bias": 0.5689671635627747, "kernel": 50.31544494628906}}, "feed_forward": {"intermediate_dense": {"bias": 2.1433615684509277, "kernel": 101.89303588867188}, "output_dense": {"bias": 0.8682550191879272, "kernel": 101.72248840332031}}, "final_layer_norm": {"bias": 2.2168030738830566, "scale": 20.854175567626953}, "layer_norm": {"bias": 2.5529403686523438, "scale": 23.543888092041016}}, "28": {"attention": {"k_proj": {"bias": 0.41049277782440186, "kernel": 52.282005310058594}, "out_proj": {"bias": 1.3869893550872803, "kernel": 50.62574768066406}, "q_proj": {"bias": 2.7657744884490967, "kernel": 51.926361083984375}, "v_proj": {"bias": 0.4606674015522003, "kernel": 50.953765869140625}}, "feed_forward": {"intermediate_dense": {"bias": 2.0904200077056885, "kernel": 101.90235137939453}, "output_dense": {"bias": 0.7706118822097778, "kernel": 103.9146728515625}}, "final_layer_norm": {"bias": 2.1265852451324463, "scale": 21.17313003540039}, "layer_norm": {"bias": 2.0535402297973633, "scale": 24.40728759765625}}, "29": {"attention": {"k_proj": {"bias": 0.07012181729078293, "kernel": 48.74150085449219}, "out_proj": {"bias": 1.3650944232940674, "kernel": 53.140689849853516}, "q_proj": {"bias": 2.739800453186035, "kernel": 48.562782287597656}, "v_proj": {"bias": 0.4178968071937561, "kernel": 53.045291900634766}}, "feed_forward": {"intermediate_dense": {"bias": 2.090588092803955, "kernel": 102.57621765136719}, "output_dense": {"bias": 0.8716259598731995, "kernel": 108.17965698242188}}, "final_layer_norm": {"bias": 2.367485523223877, "scale": 22.3073673248291}, "layer_norm": {"bias": 2.150611400604248, "scale": 25.382558822631836}}, "3": {"attention": {"k_proj": {"bias": 0.12122470140457153, "kernel": 50.1240234375}, "out_proj": {"bias": 1.362417459487915, "kernel": 46.494728088378906}, "q_proj": {"bias": 2.7192986011505127, "kernel": 50.351280212402344}, "v_proj": {"bias": 0.2992837727069855, "kernel": 46.89460372924805}}, "feed_forward": {"intermediate_dense": {"bias": 1.6324193477630615, "kernel": 99.91038513183594}, "output_dense": {"bias": 0.652489423751831, "kernel": 90.10418701171875}}, "final_layer_norm": {"bias": 1.712552547454834, "scale": 21.081472396850586}, "layer_norm": {"bias": 1.8280091285705566, "scale": 23.590805053710938}}, "30": {"attention": {"k_proj": {"bias": 0.2556205093860626, "kernel": 50.662391662597656}, "out_proj": {"bias": 1.1596031188964844, "kernel": 49.41773223876953}, "q_proj": {"bias": 2.8004462718963623, "kernel": 50.744327545166016}, "v_proj": {"bias": 0.48260730504989624, "kernel": 49.7597770690918}}, "feed_forward": {"intermediate_dense": {"bias": 2.0262579917907715, "kernel": 103.0810546875}, "output_dense": {"bias": 0.8238354921340942, "kernel": 107.17023468017578}}, "final_layer_norm": {"bias": 2.1927809715270996, "scale": 23.444385528564453}, "layer_norm": {"bias": 2.3009121417999268, "scale": 25.11281967163086}}, "31": {"attention": {"k_proj": {"bias": 0.35329535603523254, "kernel": 49.189918518066406}, "out_proj": {"bias": 1.0850541591644287, "kernel": 50.28528594970703}, "q_proj": {"bias": 2.58211612701416, "kernel": 49.29175567626953}, "v_proj": {"bias": 0.5280412435531616, "kernel": 50.414581298828125}}, "feed_forward": {"intermediate_dense": {"bias": 2.104109764099121, "kernel": 101.76882934570312}, "output_dense": {"bias": 1.0014495849609375, "kernel": 104.56185150146484}}, "final_layer_norm": {"bias": 2.081267833709717, "scale": 23.342395782470703}, "layer_norm": {"bias": 2.2968482971191406, "scale": 24.89035415649414}}, "32": {"attention": {"k_proj": {"bias": 0.20774656534194946, "kernel": 48.027523040771484}, "out_proj": {"bias": 1.093805193901062, "kernel": 49.469566345214844}, "q_proj": {"bias": 2.8449604511260986, "kernel": 48.01123809814453}, "v_proj": {"bias": 0.3959348797798157, "kernel": 49.75640869140625}}, "feed_forward": {"intermediate_dense": {"bias": 2.0342068672180176, "kernel": 100.63572692871094}, "output_dense": {"bias": 1.0621633529663086, "kernel": 103.90829467773438}}, "final_layer_norm": {"bias": 2.042248487472534, "scale": 23.77796745300293}, "layer_norm": {"bias": 2.2473816871643066, "scale": 25.152799606323242}}, "33": {"attention": {"k_proj": {"bias": 0.21002578735351562, "kernel": 47.972129821777344}, "out_proj": {"bias": 1.1304618120193481, "kernel": 49.31707000732422}, "q_proj": {"bias": 2.9878363609313965, "kernel": 47.96476364135742}, "v_proj": {"bias": 0.4270400404930115, "kernel": 49.58285903930664}}, "feed_forward": {"intermediate_dense": {"bias": 2.0419158935546875, "kernel": 99.01302337646484}, "output_dense": {"bias": 1.035032868385315, "kernel": 102.68222045898438}}, "final_layer_norm": {"bias": 1.9542993307113647, "scale": 23.545326232910156}, "layer_norm": {"bias": 2.441134452819824, "scale": 25.39746856689453}}, "34": {"attention": {"k_proj": {"bias": 0.22766727209091187, "kernel": 47.184486389160156}, "out_proj": {"bias": 1.3784213066101074, "kernel": 50.80305480957031}, "q_proj": {"bias": 2.866697072982788, "kernel": 47.23765563964844}, "v_proj": {"bias": 0.39562588930130005, "kernel": 50.73875427246094}}, "feed_forward": {"intermediate_dense": {"bias": 2.12261700630188, "kernel": 97.84877014160156}, "output_dense": {"bias": 0.9661957025527954, "kernel": 101.99974060058594}}, "final_layer_norm": {"bias": 1.8972333669662476, "scale": 23.200313568115234}, "layer_norm": {"bias": 2.521111011505127, "scale": 25.78203773498535}}, "35": {"attention": {"k_proj": {"bias": 0.3584173023700714, "kernel": 48.90650177001953}, "out_proj": {"bias": 1.2972614765167236, "kernel": 49.656463623046875}, "q_proj": {"bias": 2.6152217388153076, "kernel": 49.240753173828125}, "v_proj": {"bias": 0.4799533784389496, "kernel": 49.48086166381836}}, "feed_forward": {"intermediate_dense": {"bias": 2.20357084274292, "kernel": 96.45738983154297}, "output_dense": {"bias": 0.8606998324394226, "kernel": 100.73885345458984}}, "final_layer_norm": {"bias": 1.9771393537521362, "scale": 23.32427978515625}, "layer_norm": {"bias": 2.284558057785034, "scale": 26.273239135742188}}, "36": {"attention": {"k_proj": {"bias": 0.1910766065120697, "kernel": 46.22358703613281}, "out_proj": {"bias": 1.3378057479858398, "kernel": 50.997859954833984}, "q_proj": {"bias": 2.69999361038208, "kernel": 46.21321105957031}, "v_proj": {"bias": 0.36410093307495117, "kernel": 51.181884765625}}, "feed_forward": {"intermediate_dense": {"bias": 2.077216625213623, "kernel": 95.55625915527344}, "output_dense": {"bias": 0.8954827785491943, "kernel": 100.43450164794922}}, "final_layer_norm": {"bias": 1.6182878017425537, "scale": 23.84941864013672}, "layer_norm": {"bias": 2.0087838172912598, "scale": 25.781078338623047}}, "37": {"attention": {"k_proj": {"bias": 0.5270057916641235, "kernel": 45.26055908203125}, "out_proj": {"bias": 1.5986018180847168, "kernel": 50.98152160644531}, "q_proj": {"bias": 2.3938651084899902, "kernel": 45.3316650390625}, "v_proj": {"bias": 0.3596467077732086, "kernel": 50.85144805908203}}, "feed_forward": {"intermediate_dense": {"bias": 1.972718358039856, "kernel": 94.81257629394531}, "output_dense": {"bias": 0.9043914079666138, "kernel": 100.20516967773438}}, "final_layer_norm": {"bias": 1.446460247039795, "scale": 24.250680923461914}, "layer_norm": {"bias": 1.9784610271453857, "scale": 25.81867218017578}}, "38": {"attention": {"k_proj": {"bias": 0.6131004095077515, "kernel": 43.45063018798828}, "out_proj": {"bias": 1.2990224361419678, "kernel": 50.465415954589844}, "q_proj": {"bias": 2.328599452972412, "kernel": 43.4617919921875}, "v_proj": {"bias": 0.41812944412231445, "kernel": 50.33709716796875}}, "feed_forward": {"intermediate_dense": {"bias": 1.9179565906524658, "kernel": 92.86015319824219}, "output_dense": {"bias": 0.8921890258789062, "kernel": 98.45553588867188}}, "final_layer_norm": {"bias": 1.493703842163086, "scale": 24.967273712158203}, "layer_norm": {"bias": 2.1561625003814697, "scale": 26.533870697021484}}, "39": {"attention": {"k_proj": {"bias": 0.6435450315475464, "kernel": 43.223838806152344}, "out_proj": {"bias": 1.5930800437927246, "kernel": 50.33868408203125}, "q_proj": {"bias": 2.1112375259399414, "kernel": 43.61186218261719}, "v_proj": {"bias": 0.38806867599487305, "kernel": 50.01143264770508}}, "feed_forward": {"intermediate_dense": {"bias": 1.911240577697754, "kernel": 91.18189239501953}, "output_dense": {"bias": 0.9717031717300415, "kernel": 98.83916473388672}}, "final_layer_norm": {"bias": 1.6385811567306519, "scale": 25.60030746459961}, "layer_norm": {"bias": 2.1344854831695557, "scale": 27.176029205322266}}, "4": {"attention": {"k_proj": {"bias": 0.13508380949497223, "kernel": 52.68605041503906}, "out_proj": {"bias": 1.5433108806610107, "kernel": 47.895442962646484}, "q_proj": {"bias": 2.5196404457092285, "kernel": 52.86715316772461}, "v_proj": {"bias": 0.346028208732605, "kernel": 48.253379821777344}}, "feed_forward": {"intermediate_dense": {"bias": 1.620832920074463, "kernel": 99.4924087524414}, "output_dense": {"bias": 0.8157761096954346, "kernel": 91.32818603515625}}, "final_layer_norm": {"bias": 1.7967698574066162, "scale": 20.612895965576172}, "layer_norm": {"bias": 1.921366572380066, "scale": 23.966047286987305}}, "40": {"attention": {"k_proj": {"bias": 0.5846288204193115, "kernel": 42.58385467529297}, "out_proj": {"bias": 1.5365135669708252, "kernel": 48.99150085449219}, "q_proj": {"bias": 2.047013759613037, "kernel": 43.34952926635742}, "v_proj": {"bias": 0.44085416197776794, "kernel": 48.56916046142578}}, "feed_forward": {"intermediate_dense": {"bias": 1.7719855308532715, "kernel": 89.44975280761719}, "output_dense": {"bias": 1.023733377456665, "kernel": 96.09832000732422}}, "final_layer_norm": {"bias": 1.799849271774292, "scale": 24.870826721191406}, "layer_norm": {"bias": 2.0770671367645264, "scale": 26.721721649169922}}, "41": {"attention": {"k_proj": {"bias": 1.6700267791748047, "kernel": 39.9229736328125}, "out_proj": {"bias": 1.2981889247894287, "kernel": 50.553863525390625}, "q_proj": {"bias": 1.7261710166931152, "kernel": 40.68565368652344}, "v_proj": {"bias": 0.39677146077156067, "kernel": 49.50532150268555}}, "feed_forward": {"intermediate_dense": {"bias": 1.9123384952545166, "kernel": 86.24421691894531}, "output_dense": {"bias": 1.046877384185791, "kernel": 95.14765930175781}}, "final_layer_norm": {"bias": 2.2987754344940186, "scale": 28.323389053344727}, "layer_norm": {"bias": 2.1071293354034424, "scale": 28.51116371154785}}, "42": {"attention": {"k_proj": {"bias": 0.7963566184043884, "kernel": 36.711631774902344}, "out_proj": {"bias": 1.3385133743286133, "kernel": 44.78753662109375}, "q_proj": {"bias": 1.5455994606018066, "kernel": 38.061500549316406}, "v_proj": {"bias": 0.5876747369766235, "kernel": 43.134666442871094}}, "feed_forward": {"intermediate_dense": {"bias": 1.6500673294067383, "kernel": 85.237060546875}, "output_dense": {"bias": 1.0994627475738525, "kernel": 93.35275268554688}}, "final_layer_norm": {"bias": 2.021578550338745, "scale": 29.623117446899414}, "layer_norm": {"bias": 1.573087453842163, "scale": 27.383007049560547}}, "43": {"attention": {"k_proj": {"bias": 1.2091302871704102, "kernel": 33.231040954589844}, "out_proj": {"bias": 1.3313848972320557, "kernel": 41.18124008178711}, "q_proj": {"bias": 1.3556652069091797, "kernel": 34.04482650756836}, "v_proj": {"bias": 0.5176804065704346, "kernel": 39.0765495300293}}, "feed_forward": {"intermediate_dense": {"bias": 1.6840944290161133, "kernel": 84.46347045898438}, "output_dense": {"bias": 0.8656229972839355, "kernel": 91.28231048583984}}, "final_layer_norm": {"bias": 1.9466111660003662, "scale": 31.84062957763672}, "layer_norm": {"bias": 1.6923866271972656, "scale": 25.534801483154297}}, "44": {"attention": {"k_proj": {"bias": 2.4903624057769775, "kernel": 33.82292556762695}, "out_proj": {"bias": 1.094152808189392, "kernel": 44.906288146972656}, "q_proj": {"bias": 1.2873101234436035, "kernel": 34.19524002075195}, "v_proj": {"bias": 0.3791176676750183, "kernel": 43.99952697753906}}, "feed_forward": {"intermediate_dense": {"bias": 1.7633082866668701, "kernel": 83.41831970214844}, "output_dense": {"bias": 0.8121542930603027, "kernel": 88.93399047851562}}, "final_layer_norm": {"bias": 1.9329617023468018, "scale": 34.012855529785156}, "layer_norm": {"bias": 1.5868594646453857, "scale": 25.55303955078125}}, "45": {"attention": {"k_proj": {"bias": 2.0480284690856934, "kernel": 33.66237258911133}, "out_proj": {"bias": 0.9801313877105713, "kernel": 48.504295349121094}, "q_proj": {"bias": 1.3660603761672974, "kernel": 33.848594665527344}, "v_proj": {"bias": 0.430463582277298, "kernel": 48.662254333496094}}, "feed_forward": {"intermediate_dense": {"bias": 1.881452202796936, "kernel": 80.08783721923828}, "output_dense": {"bias": 0.9474104642868042, "kernel": 84.32579803466797}}, "final_layer_norm": {"bias": 1.6784459352493286, "scale": 32.720611572265625}, "layer_norm": {"bias": 1.517089605331421, "scale": 24.06920623779297}}, "46": {"attention": {"k_proj": {"bias": 1.5383976697921753, "kernel": 34.842689514160156}, "out_proj": {"bias": 0.7449406981468201, "kernel": 50.93416213989258}, "q_proj": {"bias": 1.5329980850219727, "kernel": 34.96400451660156}, "v_proj": {"bias": 0.371374249458313, "kernel": 51.68590545654297}}, "feed_forward": {"intermediate_dense": {"bias": 1.9419782161712646, "kernel": 74.42613983154297}, "output_dense": {"bias": 1.1017417907714844, "kernel": 74.627685546875}}, "final_layer_norm": {"bias": 1.6759411096572876, "scale": 28.232807159423828}, "layer_norm": {"bias": 1.3342748880386353, "scale": 22.984445571899414}}, "47": {"attention": {"k_proj": {"bias": 0.25899600982666016, "kernel": 37.10478210449219}, "out_proj": {"bias": 0.6298221349716187, "kernel": 45.204734802246094}, "q_proj": {"bias": 1.651426076889038, "kernel": 37.75116729736328}, "v_proj": {"bias": 0.346244215965271, "kernel": 46.18874740600586}}, "feed_forward": {"intermediate_dense": {"bias": 1.9937726259231567, "kernel": 71.76380157470703}, "output_dense": {"bias": 0.6058703064918518, "kernel": 68.12991333007812}}, "final_layer_norm": {"bias": 1.5156426429748535, "scale": 23.072559356689453}, "layer_norm": {"bias": 1.0597314834594727, "scale": 20.23412322998047}}, "5": {"attention": {"k_proj": {"bias": 0.01716863363981247, "kernel": 48.02545928955078}, "out_proj": {"bias": 1.5279195308685303, "kernel": 49.123130798339844}, "q_proj": {"bias": 2.61605167388916, "kernel": 48.16792297363281}, "v_proj": {"bias": 0.309384286403656, "kernel": 49.91912841796875}}, "feed_forward": {"intermediate_dense": {"bias": 1.5451596975326538, "kernel": 99.6029052734375}, "output_dense": {"bias": 0.8452703952789307, "kernel": 90.62547302246094}}, "final_layer_norm": {"bias": 2.074514627456665, "scale": 20.82620620727539}, "layer_norm": {"bias": 1.9513293504714966, "scale": 23.382190704345703}}, "6": {"attention": {"k_proj": {"bias": 0.20080247521400452, "kernel": 49.642425537109375}, "out_proj": {"bias": 1.5179154872894287, "kernel": 48.4422607421875}, "q_proj": {"bias": 2.6649909019470215, "kernel": 50.12200164794922}, "v_proj": {"bias": 0.31168097257614136, "kernel": 48.97262954711914}}, "feed_forward": {"intermediate_dense": {"bias": 1.5236425399780273, "kernel": 98.69625091552734}, "output_dense": {"bias": 0.6967449188232422, "kernel": 90.21633911132812}}, "final_layer_norm": {"bias": 2.3744006156921387, "scale": 20.30301284790039}, "layer_norm": {"bias": 1.953993797302246, "scale": 23.74734115600586}}, "7": {"attention": {"k_proj": {"bias": 0.1946360021829605, "kernel": 49.44458770751953}, "out_proj": {"bias": 1.333279013633728, "kernel": 48.69218444824219}, "q_proj": {"bias": 2.4403183460235596, "kernel": 49.834266662597656}, "v_proj": {"bias": 0.396398663520813, "kernel": 48.65570831298828}}, "feed_forward": {"intermediate_dense": {"bias": 1.5297949314117432, "kernel": 98.44189453125}, "output_dense": {"bias": 0.5385908484458923, "kernel": 89.95677185058594}}, "final_layer_norm": {"bias": 2.213062047958374, "scale": 20.54082489013672}, "layer_norm": {"bias": 1.8589656352996826, "scale": 22.47243309020996}}, "8": {"attention": {"k_proj": {"bias": 0.17281091213226318, "kernel": 48.94660186767578}, "out_proj": {"bias": 1.158447265625, "kernel": 49.242347717285156}, "q_proj": {"bias": 2.415729522705078, "kernel": 48.71456527709961}, "v_proj": {"bias": 0.32492464780807495, "kernel": 49.42546463012695}}, "feed_forward": {"intermediate_dense": {"bias": 1.5838847160339355, "kernel": 98.04753112792969}, "output_dense": {"bias": 0.4944479465484619, "kernel": 89.38084411621094}}, "final_layer_norm": {"bias": 2.1686348915100098, "scale": 20.328550338745117}, "layer_norm": {"bias": 1.7942993640899658, "scale": 22.935165405273438}}, "9": {"attention": {"k_proj": {"bias": 0.20703533291816711, "kernel": 49.560264587402344}, "out_proj": {"bias": 1.3596217632293701, "kernel": 50.0321044921875}, "q_proj": {"bias": 2.374345302581787, "kernel": 49.71975326538086}, "v_proj": {"bias": 0.3333733081817627, "kernel": 50.44134521484375}}, "feed_forward": {"intermediate_dense": {"bias": 1.665332317352295, "kernel": 96.656982421875}, "output_dense": {"bias": 0.6354496479034424, "kernel": 89.92427062988281}}, "final_layer_norm": {"bias": 2.0578360557556152, "scale": 19.611421585083008}, "layer_norm": {"bias": 1.8846055269241333, "scale": 24.29408073425293}}}, "pos_conv_embed": {"conv": {"bias": 5.548110008239746, "weight_g": 8.811378479003906, "weight_v": 84.63555908203125}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 2.0290679931640625, "kernel": 20.55536460876465}, "layer_norm": {"bias": 4.550922393798828, "scale": 16.167570114135742}}, "1": {"conv": {"bias": 1.7790228128433228, "kernel": 51.24136734008789}, "layer_norm": {"bias": 5.962646961212158, "scale": 23.268157958984375}}, "2": {"conv": {"bias": 1.140576720237732, "kernel": 46.50312042236328}, "layer_norm": {"bias": 4.176670551300049, "scale": 20.370853424072266}}, "3": {"conv": {"bias": 0.6725863218307495, "kernel": 44.397525787353516}, "layer_norm": {"bias": 3.888174533843994, "scale": 17.53795051574707}}, "4": {"conv": {"bias": 0.6373162269592285, "kernel": 41.314056396484375}, "layer_norm": {"bias": 2.385471820831299, "scale": 16.34571647644043}}, "5": {"conv": {"bias": 0.5147221684455872, "kernel": 37.479759216308594}, "layer_norm": {"bias": 2.020900011062622, "scale": 17.064470291137695}}, "6": {"conv": {"bias": 0.4947893023490906, "kernel": 40.64780044555664}, "layer_norm": {"bias": 0.5876954793930054, "scale": 19.058603286743164}}}}, "feature_projection": {"layer_norm": {"bias": 6.324487686157227, "scale": 16.549423217773438}, "projection": {"bias": 1.657473087310791, "kernel": 34.68488693237305}}, "masked_spec_embed": 11.914372444152832}}, "train/learning_rate": 2.2475003788713366e-05, "train/loss": 1.0439726114273071, "train/param_norm": 1185.997802734375, "_runtime": 6321, "_timestamp": 1659209487, "_step": 900}
\ No newline at end of file
diff --git a/wandb/run-20220730_174606-j2u4n7h4/logs/debug-internal.log b/wandb/run-20220730_174606-j2u4n7h4/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..b155b15f4673fc37132a762ec51635aa8d81e13f
--- /dev/null
+++ b/wandb/run-20220730_174606-j2u4n7h4/logs/debug-internal.log
@@ -0,0 +1,2993 @@
+2022-07-30 17:46:07,371 INFO    MainThread:1494437 [internal.py:wandb_internal():87] W&B internal server running at pid: 1494437, started at: 2022-07-30 17:46:07.370965
+2022-07-30 17:46:07,373 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: check_version
+2022-07-30 17:46:07,373 INFO    WriterThread:1494437 [datastore.py:open_for_write():77] open: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/run-j2u4n7h4.wandb
+2022-07-30 17:46:07,374 DEBUG   SenderThread:1494437 [sender.py:send():234] send: header
+2022-07-30 17:46:07,374 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: check_version
+2022-07-30 17:46:07,417 DEBUG   SenderThread:1494437 [sender.py:send():234] send: run
+2022-07-30 17:46:07,610 INFO    SenderThread:1494437 [dir_watcher.py:__init__():169] watching files in: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files
+2022-07-30 17:46:07,610 INFO    SenderThread:1494437 [sender.py:_start_run_threads():804] run started: j2u4n7h4 with start time 1659203166
+2022-07-30 17:46:07,610 DEBUG   SenderThread:1494437 [sender.py:send():234] send: summary
+2022-07-30 17:46:07,610 INFO    SenderThread:1494437 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 17:46:07,611 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: run_start
+2022-07-30 17:46:08,614 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/wandb-summary.json
+2022-07-30 17:46:09,947 DEBUG   HandlerThread:1494437 [meta.py:__init__():40] meta init
+2022-07-30 17:46:09,947 DEBUG   HandlerThread:1494437 [meta.py:__init__():54] meta init done
+2022-07-30 17:46:09,947 DEBUG   HandlerThread:1494437 [meta.py:probe():214] probe
+2022-07-30 17:46:09,949 DEBUG   HandlerThread:1494437 [meta.py:_setup_git():204] setup git
+2022-07-30 17:46:09,986 DEBUG   HandlerThread:1494437 [meta.py:_setup_git():211] setup git done
+2022-07-30 17:46:09,987 DEBUG   HandlerThread:1494437 [meta.py:_save_code():92] save code
+2022-07-30 17:46:10,000 DEBUG   HandlerThread:1494437 [meta.py:_save_code():113] save code done
+2022-07-30 17:46:10,000 DEBUG   HandlerThread:1494437 [meta.py:_save_patches():130] save patches
+2022-07-30 17:46:10,076 DEBUG   HandlerThread:1494437 [meta.py:_save_patches():172] save patches done
+2022-07-30 17:46:10,076 DEBUG   HandlerThread:1494437 [meta.py:_save_pip():58] save pip
+2022-07-30 17:46:10,077 DEBUG   HandlerThread:1494437 [meta.py:_save_pip():72] save pip done
+2022-07-30 17:46:10,077 DEBUG   HandlerThread:1494437 [meta.py:probe():252] probe done
+2022-07-30 17:46:10,080 DEBUG   SenderThread:1494437 [sender.py:send():234] send: files
+2022-07-30 17:46:10,080 INFO    SenderThread:1494437 [sender.py:_save_file():939] saving file wandb-metadata.json with policy now
+2022-07-30 17:46:10,081 INFO    SenderThread:1494437 [sender.py:_save_file():939] saving file code/run_flax_speech_recognition_ctc.py with policy now
+2022-07-30 17:46:10,082 INFO    SenderThread:1494437 [sender.py:_save_file():939] saving file diff.patch with policy now
+2022-07-30 17:46:10,088 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:46:10,089 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:46:10,550 INFO    Thread-11 :1494437 [upload_job.py:push():137] Uploaded file /tmp/tmp29wrl920wandb/30cflroi-wandb-metadata.json
+2022-07-30 17:46:10,583 INFO    Thread-13 :1494437 [upload_job.py:push():137] Uploaded file /tmp/tmp29wrl920wandb/3b7gf2n6-diff.patch
+2022-07-30 17:46:10,616 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/requirements.txt
+2022-07-30 17:46:10,616 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/wandb-metadata.json
+2022-07-30 17:46:10,616 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/code/run_flax_speech_recognition_ctc.py
+2022-07-30 17:46:10,616 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/diff.patch
+2022-07-30 17:46:10,617 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:46:10,617 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_created():217] file/dir created: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/code
+2022-07-30 17:46:10,815 INFO    Thread-12 :1494437 [upload_job.py:push():137] Uploaded file /tmp/tmp29wrl920wandb/1kxqhqq0-code/run_flax_speech_recognition_ctc.py
+2022-07-30 17:46:12,617 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:46:14,618 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:46:16,619 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:46:18,620 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:46:24,623 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:46:25,230 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:46:25,230 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:46:26,624 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:46:38,030 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:46:38,630 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:46:40,368 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:46:40,368 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:46:42,632 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:46:51,636 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:46:53,637 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:46:55,511 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:46:55,512 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:46:55,638 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:47:07,643 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:47:08,099 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:47:09,645 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:47:10,649 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:47:10,649 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:47:25,974 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:47:25,975 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:47:38,171 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:47:41,241 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:47:41,241 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:47:49,664 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:47:51,665 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:47:53,666 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:47:55,667 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:47:56,501 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:47:56,501 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:47:58,668 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:00,669 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:02,670 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:04,671 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:06,672 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:08,235 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:48:08,674 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:10,675 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:11,671 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:48:11,671 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:48:12,676 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:14,677 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:16,678 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:18,679 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:20,680 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:22,681 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:24,682 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:26,683 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:26,852 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:48:26,852 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:48:28,684 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:30,685 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:32,687 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:34,688 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:36,689 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:38,302 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:48:38,691 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:40,692 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:42,008 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:48:42,008 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:48:42,693 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:44,694 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:46,695 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:48,696 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:50,697 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:52,698 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:54,699 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:56,701 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:48:57,153 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:48:57,153 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:48:58,701 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:00,703 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:02,704 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:04,705 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:06,706 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:08,390 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:49:08,707 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:10,708 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:12,297 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:49:12,298 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:49:12,709 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:14,710 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:16,712 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:18,713 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:20,714 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:22,715 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:24,716 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:26,717 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:27,435 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:49:27,435 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:49:28,718 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:30,719 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:32,721 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:34,722 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:36,723 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:38,469 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:49:38,724 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:40,725 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:42,581 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:49:42,581 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:49:42,728 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:44,729 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:46,730 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:48,731 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:50,732 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:52,733 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:54,734 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:56,735 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:49:57,729 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:49:57,730 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:49:58,736 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:00,737 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:02,738 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:04,739 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:06,740 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:08,550 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:50:08,741 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:10,742 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:12,743 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:12,893 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:50:12,894 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:50:14,744 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:16,753 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:18,754 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:20,755 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:22,756 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:24,757 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:26,758 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:28,075 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:50:28,075 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:50:28,759 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:30,761 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:32,762 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:34,763 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:37,765 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:38,634 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:50:39,765 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:41,766 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:43,335 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:50:43,335 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:50:43,767 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:45,768 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:47,770 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:49,771 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:51,772 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:53,773 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:55,774 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:57,775 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:50:58,480 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:50:58,481 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:50:59,776 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:01,777 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:03,778 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:05,779 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:07,780 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:08,712 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:51:09,782 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:11,783 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:13,618 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:51:13,619 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:51:13,784 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:15,785 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:17,786 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:19,787 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:21,788 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:23,790 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:25,791 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:27,792 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:28,780 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:51:28,780 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:51:29,793 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:31,794 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:33,795 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:35,796 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:37,797 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:38,797 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:51:39,799 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:41,800 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:43,801 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:43,921 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:51:43,921 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:51:45,802 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:47,803 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:49,805 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:51,806 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:53,807 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:55,808 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:57,809 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:51:59,058 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:51:59,058 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:51:59,810 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:01,811 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:03,812 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:05,815 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:07,816 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:08,883 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:52:09,817 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:11,818 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:13,819 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:14,196 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:52:14,196 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:52:15,820 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:17,822 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:19,823 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:21,824 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:23,824 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:25,825 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:27,827 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:29,332 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:52:29,359 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:52:29,829 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:31,829 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:33,830 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:35,831 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:37,832 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:38,960 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:52:39,833 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:41,835 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:43,836 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:44,506 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:52:44,506 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:52:45,837 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:47,838 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:49,839 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:51,839 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:53,841 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:55,842 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:57,843 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:52:59,643 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:52:59,644 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:52:59,844 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:53:01,845 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:53:03,846 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:53:05,847 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:53:07,848 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:53:09,036 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:53:09,849 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:53:11,850 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:53:13,851 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:53:14,781 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:53:14,822 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:53:15,854 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:53:17,854 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:53:19,855 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:53:21,857 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:53:29,988 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:53:29,988 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:53:39,109 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:53:45,293 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:53:45,293 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:53:52,871 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:53:54,872 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:53:56,873 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:53:58,874 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:00,589 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:54:00,589 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:54:00,876 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:02,877 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:04,878 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:06,879 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:08,880 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:09,181 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:54:10,881 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:12,882 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:14,883 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:15,730 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:54:15,730 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:54:16,884 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:18,885 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:20,886 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:22,887 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:30,890 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:54:30,891 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:30,891 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:54:32,891 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:34,892 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:36,893 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:38,895 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:54:39,256 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:54:46,035 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:54:46,035 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:55:01,174 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:55:01,174 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:55:09,330 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:55:12,911 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:14,912 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:16,325 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:55:16,326 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:55:16,913 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:18,914 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:20,915 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:22,916 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:24,917 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:26,918 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:28,919 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:30,920 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:31,463 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:55:31,463 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:55:32,921 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:34,922 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:36,923 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:38,927 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:39,402 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:55:40,928 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:44,930 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:46,610 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:55:46,610 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:55:46,931 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:48,932 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:50,933 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:52,935 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:54,936 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:56,937 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:55:58,938 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:56:00,942 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:56:01,748 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:56:01,748 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:56:02,943 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:56:04,944 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:56:09,484 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:56:16,913 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:56:16,913 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:56:32,154 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:56:32,155 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:56:33,959 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:56:35,959 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:56:39,574 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:56:47,295 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:56:47,295 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:57:02,624 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:57:02,624 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:57:09,648 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:57:09,975 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:57:16,978 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:57:17,867 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:57:17,868 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:57:24,982 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:57:30,986 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:57:33,135 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:57:33,136 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:57:39,721 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:57:39,990 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:57:47,993 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:57:48,529 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:57:48,529 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:57:49,994 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:57:54,997 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:57:58,999 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:58:01,000 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:58:03,673 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:58:03,674 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:58:07,003 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:58:09,791 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:58:18,843 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:58:18,843 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:58:34,044 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:58:34,044 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:58:37,016 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:58:39,865 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:58:45,019 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:58:48,021 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 17:58:49,247 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:58:49,248 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:59:04,449 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:59:04,449 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:59:09,938 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:59:19,605 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:59:19,605 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:59:34,742 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:59:34,742 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 17:59:40,017 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 17:59:49,875 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 17:59:49,875 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:00:05,011 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:00:05,012 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:00:10,091 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:00:20,153 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:00:20,154 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:00:35,290 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:00:35,291 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:00:40,169 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:00:50,430 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:00:50,430 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:01:05,572 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:01:05,573 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:01:10,247 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:01:20,717 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:01:20,717 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:01:35,853 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:01:35,853 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:01:40,325 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:01:50,988 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:01:50,988 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:02:06,129 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:02:06,129 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:02:10,458 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:02:21,265 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:02:21,265 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:02:25,119 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:02:36,412 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:02:36,413 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:02:40,537 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:02:51,595 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:02:51,596 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:03:06,753 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:03:06,754 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:03:10,608 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:03:21,926 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:03:21,926 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:03:37,070 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:03:37,071 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:03:40,681 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:03:52,205 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:03:52,205 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:04:07,342 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:04:07,342 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:04:10,754 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:04:22,510 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:04:22,510 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:04:37,649 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:04:37,649 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:04:40,824 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:04:52,793 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:04:52,793 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:05:07,934 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:05:07,934 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:05:10,896 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:05:23,069 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:05:23,069 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:05:38,207 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:05:38,207 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:05:40,969 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:05:53,341 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:05:53,341 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:06:08,477 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:06:08,477 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:06:11,044 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:06:23,611 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:06:23,611 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:06:38,747 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:06:38,748 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:06:41,119 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:06:43,232 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:06:53,904 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:06:53,905 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:07:09,080 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:07:09,080 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:07:11,189 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:07:24,245 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:07:24,246 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:07:39,411 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:07:39,411 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:07:41,250 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:07:54,568 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:07:54,568 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:08:09,707 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:08:09,707 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:08:11,325 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:08:24,840 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:08:24,841 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:08:39,975 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:08:39,975 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:08:41,395 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:08:55,109 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:08:55,109 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:09:10,248 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:09:10,249 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:09:11,466 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:09:25,390 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:09:25,390 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:09:40,525 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:09:40,526 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:09:41,533 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:09:55,661 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:09:55,661 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:10:10,812 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:10:10,812 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:10:11,606 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:10:25,950 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:10:25,950 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:10:41,090 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:10:41,090 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:10:41,745 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:10:56,233 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:10:56,233 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:11:11,498 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:11:11,498 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:11:11,825 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:11:12,350 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:11:26,765 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:11:26,766 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:11:41,900 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:11:41,943 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:11:41,944 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:11:57,102 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:11:57,103 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:12:11,974 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:12:12,261 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:12:12,261 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:12:27,405 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:12:27,406 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:12:42,052 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:12:42,538 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:12:42,539 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:12:57,698 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:12:57,698 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:13:12,125 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:13:12,834 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:13:12,835 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:13:27,972 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:13:27,973 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:13:42,201 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:13:43,142 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:13:43,143 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:13:58,300 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:13:58,300 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:14:12,276 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:14:13,438 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:14:13,438 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:14:28,577 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:14:28,577 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:14:42,353 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:14:43,711 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:14:43,711 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:14:58,853 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:14:58,853 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:15:12,442 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:15:13,993 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:15:13,993 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:15:29,129 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:15:29,129 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:15:39,475 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:15:42,520 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:15:44,463 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:15:44,464 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:15:46,478 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:15:50,480 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:15:56,482 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:15:59,837 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:15:59,837 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:16:02,485 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:16:12,595 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:16:15,085 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:16:15,085 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:16:30,241 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:16:30,241 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:16:42,666 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:16:46,449 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:16:46,449 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:17:01,594 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:17:01,595 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:17:12,742 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:17:16,730 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:17:16,730 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:17:31,865 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:17:31,866 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:17:42,818 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:17:46,997 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:17:46,997 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:18:02,133 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:18:02,134 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:18:12,891 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:18:17,274 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:18:17,275 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:18:32,412 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:18:32,413 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:18:42,966 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:18:47,547 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:18:47,548 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:19:02,682 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:19:02,682 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:19:13,038 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:19:17,821 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:19:17,821 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:19:32,954 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:19:32,954 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:19:43,119 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:19:48,089 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:19:48,089 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:20:03,235 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:20:03,236 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:20:13,208 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:20:18,369 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:20:18,369 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:20:23,600 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:20:27,601 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:20:31,603 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:20:33,549 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:20:33,549 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:20:37,606 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:20:41,608 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:20:43,280 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:20:45,610 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:20:48,833 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:20:48,833 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:20:50,613 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:20:54,614 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:20:58,616 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:21:02,618 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:21:04,065 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:21:04,065 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:21:13,354 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:21:19,229 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:21:19,229 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:21:34,415 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:21:34,416 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:21:43,424 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:21:49,612 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:21:49,612 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:22:04,761 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:22:04,762 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:22:13,499 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:22:19,905 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:22:19,905 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:22:35,070 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:22:35,070 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:22:43,574 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:22:50,210 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:22:50,210 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:23:05,355 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:23:05,356 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:23:13,648 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:23:20,520 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:23:20,520 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:23:35,655 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:23:35,655 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:23:43,726 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:23:50,785 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:23:50,786 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:24:05,965 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:24:05,965 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:24:13,803 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:24:21,108 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:24:21,108 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:24:36,265 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:24:36,266 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:24:43,882 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:24:51,399 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:24:51,400 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:25:06,550 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:25:06,550 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:25:13,974 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:25:21,692 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:25:21,692 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:25:27,734 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:25:31,736 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:25:36,738 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:25:36,852 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:25:36,853 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:25:38,739 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:25:42,741 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:25:44,044 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:25:47,743 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:25:49,744 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:25:52,086 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:25:52,086 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:25:53,746 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:25:55,747 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:25:59,749 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:26:01,749 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:26:04,751 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:26:07,360 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:26:07,361 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:26:08,753 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:26:14,118 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:26:22,582 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:26:22,582 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:26:37,755 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:26:37,755 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:26:44,193 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:26:53,048 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:26:53,048 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:27:08,208 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:27:08,208 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:27:14,269 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:27:23,353 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:27:23,353 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:27:38,485 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:27:38,486 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:27:44,347 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:27:53,622 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:27:53,623 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:28:08,756 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:28:08,756 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:28:14,422 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:28:23,913 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:28:23,913 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:28:39,056 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:28:39,057 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:28:44,497 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:28:54,191 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:28:54,192 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:29:09,332 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:29:09,332 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:29:14,565 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:29:24,471 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:29:24,472 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:29:39,606 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:29:39,607 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:29:44,638 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:29:54,745 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:29:54,746 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:30:09,884 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:30:09,885 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:30:14,713 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:30:25,059 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:30:25,059 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:30:28,864 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:30:32,866 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:30:34,867 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:30:36,868 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:30:38,869 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:30:40,337 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:30:40,337 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:30:40,870 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:30:42,871 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:30:44,792 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:30:44,872 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:30:46,873 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:30:48,874 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:30:50,876 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:30:52,877 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:30:54,878 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:30:55,556 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:30:55,556 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:31:10,738 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:31:10,738 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:31:14,863 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:31:25,915 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:31:25,915 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:31:41,075 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:31:41,075 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:31:44,926 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:31:56,233 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:31:56,233 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:32:11,369 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:32:11,370 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:32:14,999 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:32:26,507 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:32:26,507 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:32:41,690 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:32:41,690 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:32:45,063 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:32:56,826 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:32:56,826 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:33:11,964 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:33:11,964 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:33:15,138 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:33:27,113 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:33:27,114 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:33:42,255 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:33:42,255 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:33:45,210 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:33:57,397 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:33:57,397 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:34:12,543 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:34:12,544 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:34:15,285 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:34:27,694 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:34:27,695 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:34:42,830 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:34:42,831 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:34:45,372 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:34:57,968 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:34:57,968 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:35:04,989 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:35:06,990 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:35:08,991 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:35:13,270 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:35:13,271 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:35:15,445 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:35:18,996 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:35:28,589 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:35:28,590 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:35:29,000 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:35:38,005 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:35:44,001 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:35:44,002 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:35:45,519 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:35:46,009 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:35:50,011 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:35:56,014 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:35:59,312 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:35:59,312 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:36:02,016 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:36:06,018 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:36:14,022 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:36:14,583 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:36:14,583 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:36:15,590 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:36:21,026 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:36:25,028 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:36:29,030 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:36:29,913 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:36:29,914 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:36:33,032 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:36:37,034 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:36:41,036 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:36:45,037 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:36:45,225 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:36:45,225 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:36:45,664 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:36:49,039 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:36:53,041 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:00,575 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:37:00,576 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:37:02,046 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:04,047 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:08,048 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:12,050 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:15,731 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:37:15,840 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:37:15,840 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:37:16,052 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:18,053 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:22,055 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:24,056 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:28,058 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:31,073 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:37:31,074 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:37:32,060 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:34,061 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:36,062 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:42,065 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:45,066 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:45,798 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:37:46,212 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:37:46,213 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:37:49,068 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:51,069 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:53,070 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:55,071 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:57,072 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:37:59,073 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:38:01,074 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:38:01,471 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:38:01,471 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:38:03,076 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:38:05,077 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:38:07,078 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:38:09,079 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:38:11,080 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:38:13,081 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:38:15,082 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:38:15,869 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:38:16,774 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:38:16,775 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:38:17,083 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:38:19,084 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:38:28,088 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:38:32,202 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:38:32,202 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:38:36,091 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:38:44,095 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:38:45,940 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:38:47,914 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:38:47,914 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:38:50,098 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:38:58,101 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:39:03,250 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:39:03,251 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:39:05,104 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:39:11,107 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:39:15,109 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:39:16,012 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:39:18,594 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:39:18,595 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:39:21,111 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:39:25,113 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:39:29,115 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:39:33,117 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:39:33,868 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:39:33,869 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:39:40,120 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:39:44,122 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:39:46,084 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:39:48,124 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:39:49,157 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:39:49,157 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:39:52,126 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:39:56,128 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:00,130 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:04,132 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:04,396 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:40:04,397 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:40:10,135 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:14,137 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:16,159 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:40:18,139 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:19,641 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:40:19,642 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:40:22,142 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:24,142 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:29,144 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:33,146 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:34,829 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:40:34,830 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:40:35,147 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:39,149 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:41,150 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:45,152 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:46,233 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:40:47,153 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:49,154 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:50,045 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:40:50,046 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:40:55,156 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:40:57,157 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:01,159 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:03,160 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:05,161 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:05,216 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:41:05,217 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:41:07,162 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:09,163 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:11,164 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:13,165 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:15,166 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:16,305 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:41:17,167 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:20,401 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:41:20,402 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:41:23,170 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:25,171 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:28,172 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:30,173 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:35,536 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:41:35,536 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:41:38,177 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:46,378 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:41:50,182 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:41:50,791 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:41:50,791 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:42:00,186 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:42:06,176 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:42:06,177 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:42:07,189 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:42:13,192 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:42:16,453 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:42:17,194 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:42:21,328 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:42:21,329 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:42:23,196 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:42:29,199 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:42:36,597 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:42:36,598 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:42:37,203 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:42:41,204 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:42:46,206 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:42:46,527 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:42:50,208 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:42:51,922 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:42:51,923 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:42:54,210 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:42:58,212 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:04,215 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:06,216 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:07,176 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:43:07,176 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:43:10,217 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:14,219 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:16,603 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:43:18,221 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:22,500 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:43:22,500 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:43:23,224 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:27,226 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:29,227 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:33,229 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:37,231 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:37,793 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:43:37,794 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:43:39,232 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:43,233 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:45,234 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:46,678 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:43:49,236 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:51,237 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:53,331 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:43:53,332 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:43:55,239 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:57,240 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:43:59,241 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:01,242 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:05,244 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:08,246 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:08,574 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:44:08,574 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:44:10,247 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:12,248 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:14,249 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:16,250 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:16,756 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:44:18,251 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:20,252 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:22,253 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:23,778 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:44:23,778 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:44:24,254 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:26,255 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:28,256 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:30,257 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:30,483 DEBUG   SenderThread:1494437 [sender.py:send():234] send: history
+2022-07-30 18:44:30,488 DEBUG   SenderThread:1494437 [sender.py:send():234] send: summary
+2022-07-30 18:44:30,492 INFO    SenderThread:1494437 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 18:44:31,258 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:31,258 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/wandb-summary.json
+2022-07-30 18:44:32,258 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:38,964 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:44:38,964 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:44:42,263 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:46,834 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:44:51,267 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:44:54,167 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:44:54,167 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:44:57,269 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:45:03,272 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:45:09,275 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:45:09,430 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:45:09,431 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:45:16,278 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:45:16,899 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:45:20,280 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:45:24,755 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:45:24,755 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:45:26,283 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:45:30,285 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:45:36,287 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:45:40,105 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:45:40,105 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:45:40,289 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:45:44,291 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:45:46,972 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:45:48,292 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:45:54,295 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:45:55,356 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:45:55,357 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:45:59,297 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:04,300 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:07,301 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:10,582 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:46:10,582 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:46:13,304 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:17,045 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:46:17,305 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:19,306 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:23,308 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:25,768 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:46:25,768 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:46:27,310 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:29,311 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:33,314 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:37,315 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:39,316 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:41,067 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:46:41,067 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:46:43,318 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:46,320 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:47,117 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:46:50,322 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:52,323 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:54,324 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:46:56,279 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:46:56,280 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:46:58,325 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:00,327 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:02,328 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:04,329 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:06,330 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:08,331 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:10,332 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:11,528 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:47:11,528 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:47:12,333 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:14,334 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:16,334 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:17,188 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:47:18,336 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:20,337 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:22,338 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:24,339 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:26,340 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:26,829 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:47:26,829 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:47:28,341 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:41,346 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:42,193 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:47:42,193 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:47:47,258 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:47:49,349 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:55,352 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:47:57,776 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:47:57,777 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:48:01,355 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:48:10,358 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:48:12,961 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:48:12,961 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:48:16,361 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:48:17,329 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:48:22,364 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:48:26,365 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:48:28,258 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:48:28,258 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:48:30,367 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:48:36,370 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:48:40,371 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:48:43,527 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:48:43,527 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:48:45,374 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:48:47,402 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:48:49,376 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:48:55,378 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:48:58,881 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:48:58,882 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:48:59,380 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:01,381 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:05,383 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:09,385 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:13,386 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:14,183 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:49:14,183 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:49:17,476 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:49:18,389 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:20,390 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:24,392 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:28,394 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:29,482 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:49:29,482 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:49:30,394 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:34,396 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:38,398 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:40,399 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:42,400 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:44,708 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:49:44,709 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:49:46,402 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:47,547 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:49:48,403 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:50,404 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:54,406 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:56,407 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:59,408 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:49:59,911 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:49:59,911 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:50:01,409 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:50:03,410 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:50:05,411 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:50:07,412 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:50:09,413 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:50:11,414 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:50:13,415 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:50:15,123 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:50:15,123 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:50:15,416 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:50:17,417 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:50:17,620 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:50:19,418 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:50:21,419 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:50:23,420 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:50:23,863 DEBUG   SenderThread:1494437 [sender.py:send():234] send: history
+2022-07-30 18:50:23,867 DEBUG   SenderThread:1494437 [sender.py:send():234] send: summary
+2022-07-30 18:50:23,870 INFO    SenderThread:1494437 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 18:50:24,420 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/wandb-summary.json
+2022-07-30 18:50:25,421 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:50:30,466 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:50:30,466 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:50:35,425 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:50:45,999 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:50:46,000 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:50:46,429 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:50:47,694 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:50:56,433 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:51:01,357 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:51:01,358 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:51:02,436 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:51:08,439 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:51:12,440 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:51:16,664 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:51:16,664 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:51:17,767 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:51:18,443 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:51:23,445 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:51:29,448 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:51:31,890 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:51:31,891 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:51:37,451 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:51:41,453 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:51:45,455 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:51:47,156 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:51:47,156 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:51:47,841 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:51:49,457 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:51:53,459 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:51:57,461 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:02,418 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:52:02,418 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:52:04,464 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:08,465 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:10,466 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:17,710 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:52:17,710 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:52:17,916 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:52:18,470 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:22,472 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:24,473 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:28,475 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:32,477 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:33,037 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:52:33,037 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:52:34,478 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:38,479 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:42,481 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:44,482 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:47,990 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:52:48,267 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:52:48,267 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:52:48,484 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:51,485 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:53,486 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:57,489 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:52:59,490 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:03,474 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:53:03,474 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:53:03,492 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:05,493 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:07,494 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:09,495 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:11,496 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:13,497 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:15,498 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:17,499 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:18,068 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:53:18,666 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:53:18,666 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:53:19,500 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:21,501 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:23,502 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:25,502 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:27,503 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:29,504 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:31,505 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:33,506 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:34,025 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:53:34,026 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:53:42,511 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:53:48,145 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:53:49,421 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:53:49,421 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:53:50,514 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:54:00,519 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:54:04,748 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:54:04,748 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:54:06,521 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:54:12,524 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:54:16,526 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:54:18,221 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:54:20,001 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:54:20,001 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:54:22,529 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:54:27,531 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:54:33,534 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:54:35,198 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:54:35,199 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:54:37,536 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:54:41,537 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:54:45,539 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:54:48,296 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:54:49,541 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:54:50,485 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:54:50,485 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:54:55,544 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:54:57,545 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:04,548 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:05,690 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:55:05,691 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:55:06,549 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:10,551 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:14,553 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:18,369 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:55:18,555 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:20,998 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:55:20,999 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:55:22,557 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:24,558 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:28,560 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:32,562 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:34,563 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:36,249 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:55:36,249 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:55:38,564 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:41,566 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:43,567 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:47,569 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:48,443 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:55:49,570 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:51,505 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:55:51,505 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:55:53,572 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:55,573 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:57,574 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:55:59,575 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:01,576 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:03,577 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:05,578 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:06,735 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:56:06,736 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:56:07,579 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:09,580 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:11,581 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:13,582 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:15,583 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:17,584 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:18,518 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:56:19,585 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:21,586 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:21,912 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:56:21,912 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:56:23,587 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:24,455 DEBUG   SenderThread:1494437 [sender.py:send():234] send: history
+2022-07-30 18:56:24,459 DEBUG   SenderThread:1494437 [sender.py:send():234] send: summary
+2022-07-30 18:56:24,463 INFO    SenderThread:1494437 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 18:56:24,587 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/wandb-summary.json
+2022-07-30 18:56:25,588 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:34,592 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:37,281 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:56:37,282 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:56:42,595 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:48,582 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:56:48,598 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:56:52,715 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:56:52,715 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:56:54,601 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:57:01,604 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:57:07,606 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:57:07,982 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:57:07,982 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:57:13,609 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:57:17,611 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:57:18,646 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:57:23,417 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:57:23,418 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:57:23,614 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:57:27,616 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:57:31,618 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:57:36,621 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:57:38,696 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:57:38,696 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:57:42,624 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:57:46,626 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:57:48,714 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:57:50,627 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:57:53,958 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:57:53,958 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:57:54,629 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:57:58,631 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:02,633 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:04,634 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:08,636 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:09,232 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:58:09,232 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:58:12,638 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:17,640 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:18,783 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:58:19,641 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:23,643 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:24,464 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:58:24,464 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:58:25,644 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:29,645 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:31,646 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:35,648 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:37,649 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:39,782 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:58:39,782 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:58:41,651 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:43,652 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:45,653 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:47,654 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:48,847 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:58:51,655 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:53,657 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:55,024 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:58:55,025 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:58:55,658 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:57,659 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:58:59,660 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:59:01,661 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:59:03,662 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:59:05,662 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:59:07,663 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:59:09,664 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:59:10,235 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:59:10,235 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:59:11,665 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:59:14,666 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:59:16,667 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:59:18,915 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:59:25,562 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:59:25,563 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:59:26,672 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:59:34,675 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:59:40,678 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:59:41,005 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:59:41,005 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:59:47,681 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:59:48,981 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 18:59:53,684 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 18:59:56,400 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 18:59:56,401 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 18:59:57,686 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:00:03,689 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:00:07,691 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:00:11,812 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:00:11,812 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:00:14,694 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:00:18,696 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:00:19,049 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:00:22,698 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:00:26,699 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:00:27,126 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:00:27,126 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:00:32,702 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:00:36,704 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:00:38,705 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:00:42,476 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:00:42,476 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:00:42,707 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:00:47,709 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:00:49,121 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:00:51,711 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:00:55,713 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:00:57,834 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:00:57,834 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:00:59,715 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:03,717 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:05,718 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:09,720 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:13,066 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:01:13,066 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:01:13,722 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:15,723 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:19,195 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:01:19,724 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:22,726 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:26,728 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:28,281 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:01:28,281 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:01:28,729 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:30,730 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:34,731 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:36,733 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:38,733 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:40,734 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:42,735 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:43,474 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:01:43,474 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:01:44,736 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:46,737 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:48,738 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:49,270 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:01:53,741 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:55,742 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:57,743 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:01:58,657 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:01:58,658 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:01:59,745 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:02:02,746 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:02:04,747 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:02:06,748 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:02:08,040 DEBUG   SenderThread:1494437 [sender.py:send():234] send: history
+2022-07-30 19:02:08,043 DEBUG   SenderThread:1494437 [sender.py:send():234] send: summary
+2022-07-30 19:02:08,047 INFO    SenderThread:1494437 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 19:02:08,749 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:02:08,749 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/wandb-summary.json
+2022-07-30 19:02:10,750 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:02:13,888 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:02:13,888 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:02:18,754 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:02:19,344 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:02:26,757 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:02:29,320 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:02:29,320 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:02:34,761 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:02:39,763 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:02:44,724 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:02:44,725 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:02:45,766 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:02:49,421 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:02:51,769 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:02:55,771 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:03:00,178 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:03:00,179 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:03:01,773 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:03:05,775 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:03:11,778 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:03:15,480 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:03:15,480 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:03:15,780 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:03:19,496 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:03:20,782 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:03:24,784 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:03:30,786 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:03:30,887 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:03:30,887 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:03:32,787 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:03:36,789 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:03:40,791 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:03:44,793 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:03:46,114 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:03:46,115 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:03:48,795 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:03:49,567 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:03:52,797 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:03:56,798 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:03:59,800 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:01,383 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:04:01,384 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:04:03,802 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:07,804 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:09,805 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:13,807 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:15,808 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:16,724 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:04:16,724 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:04:19,642 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:04:19,809 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:21,810 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:23,811 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:27,813 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:29,814 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:31,815 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:31,925 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:04:31,925 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:04:33,816 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:37,818 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:40,819 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:42,820 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:44,821 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:46,822 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:47,111 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:04:47,111 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:04:48,823 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:49,713 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:04:50,824 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:52,825 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:54,826 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:56,827 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:04:58,828 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:05:00,829 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:05:02,394 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:05:02,394 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:05:10,834 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:05:17,661 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:05:17,661 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:05:19,783 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:05:19,838 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:05:25,840 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:05:31,843 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:05:33,108 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:05:33,109 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:05:37,846 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:05:43,848 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:05:48,503 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:05:48,503 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:05:49,852 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:05:49,854 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:05:53,856 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:05:58,858 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:06:03,814 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:06:03,814 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:06:04,861 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:06:08,863 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:06:12,865 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:06:16,866 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:06:19,186 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:06:19,187 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:06:19,918 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:06:22,869 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:06:26,871 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:06:30,873 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:06:34,445 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:06:34,446 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:06:35,875 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:06:37,876 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:06:41,878 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:06:45,880 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:06:49,784 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:06:49,784 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:06:49,882 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:06:49,986 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:06:51,883 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:06:55,885 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:06:59,887 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:01,888 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:05,077 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:07:05,077 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:07:06,890 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:08,891 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:12,893 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:14,894 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:16,895 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:20,056 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:07:20,352 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:07:20,353 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:07:20,897 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:22,898 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:24,899 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:26,900 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:30,902 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:32,903 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:34,904 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:35,546 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:07:35,547 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:07:36,905 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:38,906 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:40,907 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:42,908 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:44,909 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:46,910 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:48,911 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:50,130 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:07:50,756 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:07:50,756 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:07:50,912 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:52,829 DEBUG   SenderThread:1494437 [sender.py:send():234] send: history
+2022-07-30 19:07:52,833 DEBUG   SenderThread:1494437 [sender.py:send():234] send: summary
+2022-07-30 19:07:52,836 INFO    SenderThread:1494437 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 19:07:52,913 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/wandb-summary.json
+2022-07-30 19:07:53,914 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:54,914 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:07:55,915 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:08:03,918 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:08:06,009 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:08:06,010 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:08:15,924 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:08:20,207 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:08:21,478 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:08:21,479 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:08:21,926 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:08:28,930 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:08:36,737 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:08:36,738 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:08:36,933 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:08:42,936 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:08:46,938 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:08:50,282 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:08:52,889 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:08:52,889 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:08:52,940 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:08:56,942 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:09:03,946 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:09:07,948 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:09:08,225 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:09:08,225 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:09:11,950 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:09:15,952 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:09:19,954 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:09:20,359 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:09:23,519 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:09:23,519 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:09:23,956 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:09:27,958 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:09:32,961 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:09:36,963 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:09:38,867 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:09:38,868 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:09:40,965 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:09:44,967 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:09:46,968 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:09:50,435 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:09:50,970 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:09:52,971 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:09:54,157 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:09:54,158 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:09:56,974 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:02,976 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:04,977 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:08,979 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:09,363 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:10:09,363 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:10:10,981 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:14,982 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:16,984 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:18,985 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:20,508 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:10:22,986 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:24,574 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:10:24,574 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:10:24,987 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:26,988 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:29,990 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:31,991 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:33,992 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:35,993 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:37,994 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:39,787 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:10:39,787 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:10:39,995 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:41,996 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:43,998 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:45,999 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:48,000 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:50,001 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:50,580 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:10:52,002 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:54,003 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:10:55,088 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:10:55,089 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:11:03,007 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:11:10,553 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:11:10,553 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:11:11,011 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:11:19,014 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:11:20,654 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:11:25,017 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:11:25,882 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:11:25,882 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:11:32,020 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:11:38,023 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:11:41,171 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:11:41,172 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:11:44,026 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:11:48,028 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:11:50,724 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:11:56,518 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:11:56,519 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:11:58,033 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:12:02,035 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:12:07,037 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:12:11,924 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:12:11,925 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:12:13,040 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:12:17,042 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:12:20,790 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:12:21,044 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:12:25,046 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:12:27,201 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:12:27,201 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:12:29,048 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:12:33,050 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:12:37,052 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:12:42,414 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:12:42,414 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:12:44,056 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:12:48,058 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:12:50,860 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:12:52,060 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:12:56,062 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:12:57,699 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:12:57,700 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:12:58,063 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:02,065 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:04,066 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:08,068 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:10,069 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:12,981 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:13:12,981 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:13:14,071 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:16,072 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:20,074 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:20,932 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:13:23,076 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:25,077 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:28,175 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:13:28,175 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:13:31,080 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:33,081 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:35,083 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:37,084 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:39,085 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:41,086 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:43,087 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:43,343 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:13:43,343 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:13:45,088 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:47,089 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:49,091 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:51,006 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:13:51,092 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:53,093 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:57,095 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:13:58,554 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:13:58,555 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:13:59,096 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:14:01,060 DEBUG   SenderThread:1494437 [sender.py:send():234] send: history
+2022-07-30 19:14:01,063 DEBUG   SenderThread:1494437 [sender.py:send():234] send: summary
+2022-07-30 19:14:01,066 INFO    SenderThread:1494437 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 19:14:01,097 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:14:01,097 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/wandb-summary.json
+2022-07-30 19:14:03,098 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:14:04,099 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:14:12,103 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:14:13,846 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:14:13,846 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:14:20,106 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:14:21,080 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:14:26,109 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:14:29,296 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:14:29,296 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:14:32,112 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:14:38,115 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:14:44,754 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:14:44,754 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:14:45,119 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:14:49,120 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:14:51,153 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:14:55,123 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:14:59,125 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:15:00,182 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:15:00,183 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:15:03,127 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:15:09,131 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:15:13,132 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:15:15,547 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:15:15,548 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:15:17,134 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:15:21,136 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:15:21,225 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:15:26,139 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:15:30,141 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:15:30,893 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:15:30,893 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:15:34,143 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:15:38,145 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:15:42,147 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:15:44,149 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:15:46,112 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:15:46,112 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:15:48,151 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:15:51,302 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:15:52,153 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:15:54,154 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:15:58,156 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:01,327 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:16:01,328 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:16:02,157 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:04,158 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:08,160 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:11,161 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:13,163 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:16,575 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:16:16,576 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:16:17,164 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:19,166 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:21,167 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:21,378 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:16:25,169 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:27,169 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:29,171 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:31,172 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:31,757 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:16:31,757 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:16:33,173 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:35,174 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:37,175 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:39,176 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:41,177 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:43,178 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:45,179 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:46,914 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:16:46,915 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:16:47,180 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:49,181 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:51,182 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:16:51,447 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:17:01,187 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:17:02,219 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:17:02,219 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:17:10,192 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:17:16,195 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:17:17,713 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:17:17,714 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:17:21,517 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:17:22,197 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:17:28,200 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:17:33,013 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:17:33,014 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:17:34,203 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:17:39,206 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:17:45,209 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:17:48,339 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:17:48,339 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:17:49,210 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:17:51,589 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:17:53,213 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:01,217 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:03,643 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:18:03,643 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:18:05,219 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:09,221 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:14,223 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:18,225 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:18,936 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:18:18,936 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:18:21,663 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:18:22,227 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:26,229 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:30,231 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:34,233 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:34,278 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:18:34,278 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:18:38,235 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:40,236 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:44,237 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:49,240 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:49,491 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:18:49,491 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:18:51,241 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:51,730 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:18:55,243 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:18:59,244 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:01,245 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:04,702 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:19:04,702 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:19:05,247 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:07,248 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:09,250 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:13,252 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:15,253 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:17,254 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:19,255 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:19,904 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:19:19,904 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:19:21,256 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:21,804 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:19:23,257 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:25,258 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:27,259 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:30,260 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:32,261 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:34,262 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:35,113 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:19:35,114 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:19:36,264 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:38,265 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:40,266 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:42,267 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:44,268 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:45,250 DEBUG   SenderThread:1494437 [sender.py:send():234] send: history
+2022-07-30 19:19:45,254 DEBUG   SenderThread:1494437 [sender.py:send():234] send: summary
+2022-07-30 19:19:45,258 INFO    SenderThread:1494437 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 19:19:45,269 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/wandb-summary.json
+2022-07-30 19:19:46,269 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:48,271 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:19:50,406 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:19:50,406 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:19:51,880 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:19:56,275 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:20:05,279 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:20:05,776 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:20:05,776 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:20:11,282 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:20:17,284 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:20:21,129 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:20:21,130 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:20:21,956 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:20:23,287 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:20:29,290 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:20:36,293 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:20:36,499 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:20:36,500 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:20:40,295 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:20:46,299 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:20:50,300 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:20:51,891 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:20:51,891 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:20:52,031 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:20:54,302 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:20:58,304 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:02,306 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:07,174 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:21:07,174 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:21:08,309 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:10,310 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:17,313 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:21,315 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:22,105 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:21:22,494 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:21:22,494 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:21:23,316 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:27,318 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:31,320 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:35,322 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:37,796 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:21:37,797 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:21:39,324 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:41,325 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:45,327 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:49,329 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:51,330 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:52,180 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:21:53,087 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:21:53,088 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:21:55,332 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:21:57,333 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:00,335 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:04,337 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:06,337 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:08,338 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:08,340 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:22:08,341 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:22:12,341 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:14,342 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:16,343 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:18,344 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:20,345 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:22,251 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:22:22,346 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:23,569 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:22:23,570 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:22:24,347 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:26,348 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:28,349 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:30,350 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:32,351 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:34,352 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:36,353 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:38,354 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:38,983 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:22:38,983 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:22:49,358 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:22:52,322 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:22:54,251 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:22:54,251 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:22:57,362 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:23:03,364 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:23:09,435 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:23:09,436 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:23:10,368 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:23:16,370 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:23:22,373 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:23:22,391 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:23:24,786 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:23:24,786 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:23:26,375 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:23:32,378 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:23:36,379 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:23:39,976 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:23:39,976 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:23:42,382 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:23:47,385 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:23:51,387 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:23:52,463 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:23:55,204 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:23:55,205 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:23:55,389 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:23:59,390 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:03,392 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:07,394 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:10,412 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:24:10,413 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:24:11,396 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:15,398 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:20,400 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:22,535 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:24:24,403 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:25,738 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:24:25,738 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:24:28,405 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:30,406 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:34,408 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:38,410 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:40,411 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:41,050 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:24:41,050 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:24:44,413 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:46,414 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:51,416 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:52,608 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:24:53,417 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:55,418 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:24:56,254 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:24:56,255 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:24:59,420 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:01,421 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:03,422 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:05,423 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:07,424 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:09,425 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:11,426 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:11,428 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:25:11,428 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:25:13,427 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:15,428 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:17,429 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:19,430 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:21,431 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:22,682 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:25:23,433 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:25,434 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:26,617 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:25:26,617 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:25:27,435 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:32,437 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:32,710 DEBUG   SenderThread:1494437 [sender.py:send():234] send: history
+2022-07-30 19:25:32,714 DEBUG   SenderThread:1494437 [sender.py:send():234] send: summary
+2022-07-30 19:25:32,718 INFO    SenderThread:1494437 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 19:25:33,438 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/wandb-summary.json
+2022-07-30 19:25:34,438 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:41,772 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:25:41,773 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:25:45,443 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:52,768 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:25:53,447 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:25:56,944 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:25:56,944 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:25:59,451 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:26:05,454 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:26:11,457 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:26:12,236 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:26:12,236 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:26:18,461 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:26:22,463 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:26:22,844 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:26:27,619 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:26:27,620 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:26:28,466 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:26:32,468 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:26:38,471 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:26:42,473 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:26:43,054 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:26:43,055 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:26:46,475 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:26:50,477 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:26:52,918 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:26:57,480 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:26:58,412 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:26:58,412 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:27:01,482 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:03,483 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:07,485 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:11,487 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:13,690 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:27:13,690 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:27:15,490 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:19,492 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:22,995 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:27:23,494 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:26,495 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:28,900 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:27:28,901 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:27:30,497 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:32,498 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:36,500 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:38,501 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:42,503 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:44,115 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:27:44,115 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:27:44,504 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:48,506 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:50,507 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:52,508 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:53,071 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:27:56,510 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:58,512 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:27:59,329 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:27:59,330 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:28:00,513 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:02,514 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:04,515 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:06,516 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:08,517 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:11,518 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:13,519 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:14,554 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:28:14,554 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:28:15,520 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:17,522 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:19,523 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:21,524 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:23,147 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:28:23,525 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:25,526 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:27,527 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:29,920 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:28:29,921 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:28:37,532 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:45,271 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:28:45,271 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:28:47,537 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:28:53,211 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:28:53,540 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:29:00,417 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:29:00,417 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:29:06,546 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:29:12,549 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:29:15,671 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:29:15,672 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:29:16,551 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:29:22,554 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:29:23,278 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:29:26,556 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:29:31,093 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:29:31,093 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:29:32,559 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:29:37,561 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:29:41,563 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:29:45,565 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:29:46,501 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:29:46,502 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:29:49,567 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:29:53,346 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:29:53,569 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:29:57,571 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:01,574 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:01,841 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:30:01,841 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:30:05,576 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:09,578 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:15,581 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:17,049 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:30:17,049 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:30:20,583 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:22,584 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:23,415 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:30:26,586 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:30,588 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:32,309 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:30:32,309 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:30:32,589 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:36,591 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:38,592 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:42,594 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:44,595 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:46,596 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:47,593 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:30:47,594 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:30:50,598 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:52,599 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:30:53,485 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:30:59,602 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:01,603 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:02,850 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:31:02,850 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:31:03,605 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:05,606 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:07,607 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:09,608 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:11,609 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:13,610 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:15,611 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:17,612 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:18,121 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:31:18,122 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:31:19,614 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:21,615 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:23,549 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:31:23,616 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:25,617 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:27,618 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:27,962 DEBUG   SenderThread:1494437 [sender.py:send():234] send: history
+2022-07-30 19:31:27,966 DEBUG   SenderThread:1494437 [sender.py:send():234] send: summary
+2022-07-30 19:31:27,969 INFO    SenderThread:1494437 [sender.py:_save_file():939] saving file wandb-summary.json with policy end
+2022-07-30 19:31:28,618 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/wandb-summary.json
+2022-07-30 19:31:29,619 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:30,619 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:31:33,387 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:31:33,387 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:31:48,596 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:31:48,596 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:31:53,616 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:32:03,736 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:32:03,736 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:32:12,639 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:32:20,454 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:32:20,454 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:32:23,686 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:32:35,622 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:32:35,622 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:32:50,759 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:32:50,759 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:32:53,758 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:32:54,657 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:33:06,034 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:33:06,035 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:33:21,186 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:33:21,187 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:33:23,835 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:33:33,674 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:33:36,348 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:33:36,348 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:33:51,512 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:33:51,512 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:33:53,914 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:34:06,645 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:34:06,646 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:34:09,691 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:34:15,695 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:34:21,698 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:34:21,888 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:34:21,889 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:34:23,988 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:34:25,700 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:34:32,703 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:34:37,109 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:34:37,110 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:34:52,261 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:34:52,261 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:34:54,065 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:35:07,497 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:35:07,497 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:35:08,719 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:35:14,722 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:35:18,724 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:35:22,726 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:35:23,173 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:35:23,173 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:35:24,148 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:35:26,728 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:35:30,730 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:35:34,732 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:35:38,474 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:35:38,475 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:35:38,734 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:35:43,736 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:35:47,738 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:35:53,674 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:35:53,675 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:35:54,217 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:36:08,820 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:36:08,820 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:36:21,753 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:36:24,032 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:36:24,032 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:36:24,297 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:36:25,755 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:36:29,757 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:36:31,758 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:36:35,760 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:36:39,328 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:36:39,329 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:36:39,762 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:36:41,763 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:36:45,764 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:36:47,765 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:36:51,767 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:36:54,373 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:36:54,639 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:36:54,640 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:36:54,769 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:36:58,771 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:37:00,772 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:37:09,869 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:37:09,869 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:37:24,453 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:37:25,025 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:37:25,025 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:37:35,787 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:37:37,787 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:37:39,788 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:37:40,181 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:37:40,181 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:37:41,789 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:37:43,790 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:37:45,791 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:37:47,793 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:37:49,794 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:37:51,795 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:37:53,796 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:37:54,528 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:37:55,371 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:37:55,371 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:37:55,797 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:37:57,798 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:37:59,799 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:38:10,552 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:38:10,552 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:38:24,607 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:38:25,686 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:38:25,687 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:38:31,813 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:38:33,814 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:38:36,815 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:38:40,962 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:38:40,962 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:38:46,819 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:38:52,822 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:38:54,683 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:38:56,487 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:38:56,488 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:38:58,825 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:39:04,827 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:39:11,790 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:39:11,791 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:39:11,830 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:39:17,833 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:39:23,836 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:39:24,758 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:39:27,050 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:39:27,050 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:39:27,838 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:39:33,841 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:39:37,843 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:39:42,335 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:39:42,335 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:39:42,845 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:39:48,848 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:39:52,849 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:39:54,830 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:39:56,851 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:39:57,575 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:39:57,575 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:40:00,853 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:04,855 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:08,857 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:12,859 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:12,888 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:40:12,888 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:40:16,861 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:21,863 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:24,904 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:40:25,865 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:28,166 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:40:28,166 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:40:29,867 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:31,868 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:35,870 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:37,871 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:41,873 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:43,463 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:40:43,463 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:40:45,875 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:47,876 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:51,878 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:53,879 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:54,975 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:40:55,880 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:40:58,751 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:40:58,752 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:40:59,882 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:01,883 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:03,884 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:06,885 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:08,886 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:10,887 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:12,888 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:13,967 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:41:13,968 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:41:14,889 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:16,890 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:18,891 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:20,892 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:22,893 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:24,894 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:25,046 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:41:26,895 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:28,896 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:29,580 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:41:29,580 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:41:30,897 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:38,901 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:44,932 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:41:44,932 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:41:47,905 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:53,908 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:41:55,124 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:42:00,178 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:42:00,179 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:42:01,912 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:42:05,914 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:42:11,916 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:42:15,475 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:42:15,476 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:42:18,919 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:42:22,921 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:42:25,197 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:42:28,924 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:42:30,722 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:42:30,722 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:42:34,926 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:42:38,928 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:42:42,930 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:42:46,091 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:42:46,091 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:42:46,932 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:42:51,934 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:42:55,273 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:42:55,936 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:42:59,938 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:01,405 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:43:01,405 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:43:03,940 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:07,942 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:11,944 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:15,946 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:16,662 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:43:16,662 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:43:19,948 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:23,950 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:25,346 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:43:26,951 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:30,953 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:31,907 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:43:31,907 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:43:32,954 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:36,956 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:38,957 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:42,958 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:44,960 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:47,108 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:43:47,108 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:43:48,961 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:50,962 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:52,964 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:54,965 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:43:55,422 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:43:58,966 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:00,967 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:02,330 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:44:02,331 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:44:02,968 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:04,969 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:06,970 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:08,971 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:10,972 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:12,973 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:15,975 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:17,541 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:44:17,541 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:44:17,976 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:19,977 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:21,978 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:23,979 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:25,496 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:44:32,884 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:44:32,884 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:44:33,983 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:39,986 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:47,990 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:48,212 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:44:48,212 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:44:53,993 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:44:55,571 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:45:00,996 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:45:03,415 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:45:03,415 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:45:04,998 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:45:11,000 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:45:17,003 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:45:18,664 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:45:18,665 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:45:21,005 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:45:25,007 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:45:25,645 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:45:31,010 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:45:34,070 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:45:34,070 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:45:36,012 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:45:40,014 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:45:44,016 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:45:48,018 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:45:49,331 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:45:49,332 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:45:52,020 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:45:55,721 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:45:58,022 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:00,024 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:04,025 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:04,617 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:46:04,617 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:46:08,028 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:12,030 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:14,031 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:19,033 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:19,849 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:46:19,849 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:46:23,035 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:25,036 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:25,794 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:46:29,038 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:31,039 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:35,011 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:46:35,012 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:46:35,041 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:37,042 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:41,044 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:43,045 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:45,046 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:47,047 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:49,048 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:50,187 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:46:50,188 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:46:51,049 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:55,051 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:46:55,870 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:46:57,052 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:47:01,054 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:47:03,055 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:47:05,055 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:47:05,388 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:47:05,388 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:47:07,056 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:47:10,058 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:47:12,059 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:47:14,060 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:47:16,061 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:47:20,690 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:47:20,690 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:47:25,945 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:47:26,066 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:47:34,069 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:47:36,041 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:47:36,041 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:47:40,072 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:47:46,075 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:47:51,427 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:47:51,428 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:47:53,078 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:47:56,020 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:47:59,081 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:48:03,083 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:48:06,597 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:48:06,598 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:48:09,085 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:48:15,088 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:48:19,090 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:48:21,760 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:48:21,760 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:48:23,092 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:48:26,096 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:48:28,094 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:48:34,097 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:48:37,294 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:48:37,295 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:48:38,099 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:48:42,101 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:48:46,102 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:48:50,104 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:48:52,612 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:48:52,612 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:48:54,106 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:48:56,170 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:48:58,108 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:02,110 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:04,111 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:07,910 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:49:07,910 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:49:09,113 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:13,115 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:15,116 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:19,118 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:23,120 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:23,122 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:49:23,122 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:49:25,121 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:26,242 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:49:27,122 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:31,123 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:33,124 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:37,126 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:38,307 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:49:38,307 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:49:39,127 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:41,128 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:43,129 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:45,130 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:47,131 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:49,132 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:52,134 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:53,573 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:49:53,573 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:49:54,135 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:56,136 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:49:56,317 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:49:58,137 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:50:00,138 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:50:02,139 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:50:04,140 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:50:06,141 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:50:08,142 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:50:08,898 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:50:08,898 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:50:10,143 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:50:18,147 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:50:24,185 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:50:24,185 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:50:26,150 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:50:26,392 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:50:33,153 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:50:39,404 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:50:39,404 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:50:41,156 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:50:45,158 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:50:51,161 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:50:54,749 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:50:54,750 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:50:56,470 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:50:57,164 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:51:03,167 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:51:08,169 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:51:09,904 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:51:09,905 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:51:12,171 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:51:18,174 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:51:22,176 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:51:25,152 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:51:25,152 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:51:26,177 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:51:26,545 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:51:30,179 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:51:34,181 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:51:40,184 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:51:40,422 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:51:40,423 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:51:43,185 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:51:47,187 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:51:51,189 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:51:55,190 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:51:55,715 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:51:55,715 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:51:56,622 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:51:59,192 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:01,193 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:05,195 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:09,197 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:10,962 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:52:10,963 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:52:11,198 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:15,199 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:17,200 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:21,202 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:23,203 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:26,232 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:52:26,233 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:52:26,692 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:52:28,205 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:30,206 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:32,207 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:36,209 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:38,210 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:40,211 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:41,529 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:52:41,529 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:52:42,212 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:44,213 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:46,214 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:48,215 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:50,216 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:52,217 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:54,218 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:56,219 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:52:56,707 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:52:56,707 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:52:56,840 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:52:58,220 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:53:00,221 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:53:02,223 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:53:05,224 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:53:12,001 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:53:12,001 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:53:13,228 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:53:21,231 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:53:26,844 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:53:27,234 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:53:27,306 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:53:27,307 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:53:33,237 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:53:39,239 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:53:42,895 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:53:42,896 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:53:46,242 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:53:52,245 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:53:56,247 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:53:56,922 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:53:58,156 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:53:58,157 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:54:02,249 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:54:06,251 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:54:12,254 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:54:13,476 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:54:13,477 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:54:16,256 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:54:21,258 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:54:25,260 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:54:26,995 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:54:28,715 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:54:28,716 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:54:29,262 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:54:33,263 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:54:37,265 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:54:41,267 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:54:44,069 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:54:44,069 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:54:45,269 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:54:49,271 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:54:53,272 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:54:55,273 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:54:57,070 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:54:59,275 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:54:59,383 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:54:59,384 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:55:04,277 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:06,278 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:10,279 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:12,280 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:14,672 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:55:14,673 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:55:16,282 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:18,283 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:22,285 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:24,286 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:26,287 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:27,148 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:55:28,288 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:29,892 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:55:29,892 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:55:32,290 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:34,291 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:36,292 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:38,293 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:40,294 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:42,295 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:44,296 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:45,130 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:55:45,131 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:55:46,297 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:48,298 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:50,298 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:52,299 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:55,301 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:57,225 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:55:57,302 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:55:59,303 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:56:00,407 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:56:00,408 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:56:07,306 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:56:15,310 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:56:15,764 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:56:15,764 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:56:22,313 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:56:27,299 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:56:28,316 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:56:31,188 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:56:31,188 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:56:34,318 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:56:40,321 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:56:46,324 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:56:46,484 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:56:46,484 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:56:50,326 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:56:56,328 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:56:57,375 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:57:01,331 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:57:01,880 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:57:01,880 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:57:05,333 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:57:11,335 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:57:15,337 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:57:17,091 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:57:17,091 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:57:19,339 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:57:23,341 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:57:27,343 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:57:27,449 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:57:31,345 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:57:32,352 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:57:32,353 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:57:36,347 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:57:40,349 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:57:44,350 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:57:47,706 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:57:47,706 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:57:48,352 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:57:50,353 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:57:54,355 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:57:57,523 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:57:58,357 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:00,358 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:02,999 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:58:03,000 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:58:04,360 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:08,362 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:10,363 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:12,364 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:16,366 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:18,290 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:58:18,290 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:58:18,366 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:21,368 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:25,370 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:27,371 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:27,599 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:58:29,372 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:31,373 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:33,374 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:33,524 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:58:33,525 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:58:35,375 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:37,376 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:39,377 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:41,378 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:43,379 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:45,380 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:47,381 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:48,709 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:58:48,709 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:58:49,382 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:51,383 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:58:57,672 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:59:01,388 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:03,920 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:59:03,920 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:59:06,390 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:12,393 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:18,396 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:19,223 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:59:19,223 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:59:22,397 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:26,399 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:27,740 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:59:30,401 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:34,403 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:34,440 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:59:34,440 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:59:38,405 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:40,406 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:44,408 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:47,409 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:49,681 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 19:59:49,681 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 19:59:51,411 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:53,413 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:55,413 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:57,415 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 19:59:57,809 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 19:59:59,416 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 20:00:01,417 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 20:00:03,418 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 20:00:04,905 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 20:00:04,905 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 20:00:05,419 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 20:00:20,068 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 20:00:20,068 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 20:00:27,873 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 20:00:35,229 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 20:00:35,230 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 20:00:35,432 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 20:00:37,433 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 20:00:40,434 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 20:00:43,436 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 20:00:48,438 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 20:00:50,365 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 20:00:50,366 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 20:00:50,439 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 20:00:52,440 INFO    Thread-8  :1494437 [dir_watcher.py:_on_file_modified():230] file/dir modified: /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/files/output.log
+2022-07-30 20:00:57,941 DEBUG   SenderThread:1494437 [sender.py:send():234] send: stats
+2022-07-30 20:01:05,497 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 20:01:05,498 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
+2022-07-30 20:01:20,634 DEBUG   HandlerThread:1494437 [handler.py:handle_request():130] handle_request: stop_status
+2022-07-30 20:01:20,634 DEBUG   SenderThread:1494437 [sender.py:send_request():248] send_request: stop_status
diff --git a/wandb/run-20220730_174606-j2u4n7h4/logs/debug.log b/wandb/run-20220730_174606-j2u4n7h4/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..d371b1225307ecd5f2613ffea38f3ace73c91d0c
--- /dev/null
+++ b/wandb/run-20220730_174606-j2u4n7h4/logs/debug.log
@@ -0,0 +1,23 @@
+2022-07-30 17:46:06,455 INFO    MainThread:1493173 [wandb_setup.py:_flush():71] setting env: {'project': 'wav2vec2', 'entity': 'NbAiLab'}
+2022-07-30 17:46:06,455 INFO    MainThread:1493173 [wandb_setup.py:_flush():71] setting login settings: {}
+2022-07-30 17:46:06,455 INFO    MainThread:1493173 [wandb_init.py:_log_setup():371] Logging user logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/logs/debug.log
+2022-07-30 17:46:06,455 INFO    MainThread:1493173 [wandb_init.py:_log_setup():372] Logging internal logs to /data/wav2vec2-1b-npsc-nst/wandb/run-20220730_174606-j2u4n7h4/logs/debug-internal.log
+2022-07-30 17:46:06,455 INFO    MainThread:1493173 [wandb_init.py:init():404] calling init triggers
+2022-07-30 17:46:06,455 INFO    MainThread:1493173 [wandb_init.py:init():409] wandb.init called with sweep_config: {}
+config: {}
+2022-07-30 17:46:06,455 INFO    MainThread:1493173 [wandb_init.py:init():460] starting backend
+2022-07-30 17:46:06,455 INFO    MainThread:1493173 [backend.py:_multiprocessing_setup():99] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2022-07-30 17:46:06,500 INFO    MainThread:1493173 [backend.py:ensure_launched():216] starting backend process...
+2022-07-30 17:46:06,555 INFO    MainThread:1493173 [backend.py:ensure_launched():221] started backend process with pid: 1494437
+2022-07-30 17:46:06,558 INFO    MainThread:1493173 [wandb_init.py:init():469] backend started and connected
+2022-07-30 17:46:06,572 INFO    MainThread:1493173 [wandb_init.py:init():533] updated telemetry
+2022-07-30 17:46:06,680 INFO    MainThread:1493173 [wandb_init.py:init():563] communicating current version
+2022-07-30 17:46:07,415 INFO    MainThread:1493173 [wandb_init.py:init():568] got version response upgrade_message: "wandb version 0.12.21 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+
+2022-07-30 17:46:07,415 INFO    MainThread:1493173 [wandb_init.py:init():578] communicating run to backend with 30 second timeout
+2022-07-30 17:46:07,610 INFO    MainThread:1493173 [wandb_init.py:init():606] starting run threads in backend
+2022-07-30 17:46:10,084 INFO    MainThread:1493173 [wandb_run.py:_console_start():1810] atexit reg
+2022-07-30 17:46:10,084 INFO    MainThread:1493173 [wandb_run.py:_redirect():1684] redirect: SettingsConsole.REDIRECT
+2022-07-30 17:46:10,085 INFO    MainThread:1493173 [wandb_run.py:_redirect():1689] Redirecting console.
+2022-07-30 17:46:10,087 INFO    MainThread:1493173 [wandb_run.py:_redirect():1745] Redirects installed.
+2022-07-30 17:46:10,087 INFO    MainThread:1493173 [wandb_init.py:init():633] run started, returning control to user process
diff --git a/wandb/run-20220730_174606-j2u4n7h4/run-j2u4n7h4.wandb b/wandb/run-20220730_174606-j2u4n7h4/run-j2u4n7h4.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..dcde2f1edf869816da69d7f8600182a5c6f83486
--- /dev/null
+++ b/wandb/run-20220730_174606-j2u4n7h4/run-j2u4n7h4.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b87b7a53adffc2e079e59c86db7bc52080717145f97723ec34ae2c2efb9fe35
+size 1552129